格式工厂合并 mp4 和 srt,并利用 python 按照字幕剪辑视频,将其分割为若干小段

格式工厂合并 mp4 和 srt,并利用 python 按照字幕剪辑视频,将其分割为若干小段 一、视频合并
1.选择转换为mp4,将视频导入格式工厂 2.调整字幕样式 【格式工厂合并 mp4 和 srt,并利用 python 按照字幕剪辑视频,将其分割为若干小段】格式工厂合并 mp4 和 srt,并利用 python 按照字幕剪辑视频,将其分割为若干小段
文章图片

二、python
1.可能用到的命令: pip install moviepy
2.main.py

import cut_srt import cut_videoif __name__ == '__main__': my_video_path = r"D:\Videos\Star Wars\Star Wars 9 The.Rise.of.Skywalker.2019.mp4" # 将目标文件夹里所有的srt文件都进行格式化txt操作 my_srt_path = r"D:\Documents" cut_srt.srt_to_format_txt(my_srt_path) cut_video.cut_video_by_srt(my_video_path, my_srt_path)

3.cut_srt.py
import re import osdef check_contain_chinese(check_str): for ch in check_str: if u'\u4e00' <= ch <= u'\u9fff': return True return False# 输入_原题是,返回格式化后的文件名 def validate_title(str_): pattern = r"[\/\\\:\*\?\"\<\>\|\.\,\!\'\-\?\?\!\…\“\”\,]" new_title = re.sub(pattern, "", str_) # strip() 方法用于移除字符串头尾指定的字符(默认为空格或换行符)或字符序列。注意:该方法只能删除开头或是结尾的字符,不能删除中间部分的字符 # re.sub(' +', ' ', str) 将 str 中的多个空格转化为一个空格 return re.sub(' +', ' ', new_title.lower().strip())# 输入字幕文件,得到由字幕组成的二维列表 def get_format_sequences(srt_path_): with open(srt_path_, 'r', encoding='utf-8-sig') as f: content_ = f.read() sequences_ = content_.split('\n\n') sequences_ = [se.split('\n') for se in sequences_] # 去除每一句空值 sequences_ = [list(filter(None, sequence)) for sequence in sequences_] new_sequences = [] for se in sequences_: if len(se) == 4: new_sequences.append(se) i = 0 for se in new_sequences: # 序号,时间段,字幕1,字幕2 new_sequences[i] = [se[0], se[1], se[2], se[3]] i += 1 return new_sequences# 输入时间 1:20:12, 输出对应时间的秒数(1*3600+20*60+12) def str2sec(x): h, m, s = x.split(':') return int(h) * 3600 + int(m) * 60 + int(s)# 输入 02:09:53,440 --> 02:09:55,740,返回一个起始时间对应的秒数和结束时间对应的秒数 def get_start_end_time(str_): start_time_, end_time_ = str_.strip().split("-->") start_time_ = start_time_.split(",")[0] end_time_ = end_time_.split(",")[0] return str2sec(start_time_), str2sec(end_time_)def srt_to_format_txt(srt_path): for fileName in os.listdir(srt_path): if fileName.endswith(".srt"): print(fileName) file_path = srt_path + "\\" + fileName count = 1 sequences = get_format_sequences(file_path) # 判断字幕第一行是否包含中文 if check_contain_chinese(sequences[0][2]): en_position = 3 ch_position = 2 else: en_position = 2 ch_position = 3 # r: 以只读方式打开文件。文件的指针将会放在文件的开头 # rb: 以二进制格式打开一个文件用于只读。文件指针将会放在文件的开头 # w: 打开一个文件只用于写入。如果该文件已存在则将其覆盖。如果该文件不存在,创建新文件 # wb: 以二进制格式打开一个文件只用于写入。如果该文件已存在则将其覆盖。如果该文件不存在,创建新文件 # a: 打开一个文件用于追加。如果该文件已存在,文件指针将会放在文件的结尾。也就是说,新的内容将会被写入到已有内容之后。如果该文件不存在,创建新文件进行写入 # ab: 以二进制格式打开一个文件用于追加。如果该文件已存在,文件指针将会放在文件的结尾。也就是说,新的内容将会被写入到已有内容之后。如果该文件不存在,创建新文件进行写入 with open(file_path.replace(".srt", ".csv"), "w", encoding='utf-8-sig') as f: for i in sequences: en = i[en_position] ch = i[ch_position] movie_name = file_path.split("\\")[-1].replace(".srt", "") count_format = "{:05d}".format(count) sentence_id = i[0] en_format = re.sub(' +', ' ', en.replace("- ", " ").replace(",", " ").strip()) ch_format = validate_title(ch) line = movie_name + "," + count_format + "," + sentence_id + "," + en_format + "," + ch_format print(line) f.write(line + "\n") count = count + 1

4.cut_video.py
import os import time from moviepy.video.io.VideoFileClip import VideoFileClip from cut_srt import get_format_sequences, get_start_end_timedef cut_video_by_start_end(video_path_, save_file_path_, my_start, my_end, save_name): video = VideoFileClip(video_path_) video = video.subclip(my_start, my_end) video.write_videofile(save_file_path_ + save_name, fps=24, logger=None) video.close()def cut_video_by_srt(video_path, srt_path): # 将裁剪后的视频片段存放在 save_file_path save_file_path = video_path.replace(".mp4", "\\") if not os.path.exists(save_file_path): os.makedirs(save_file_path) count = len(os.listdir(save_file_path)) if count == 0: count = 1 start_index = 0 else: count = count start_index = count - 1 sequences = get_format_sequences(srt_path) my_time = time.time() for i in sequences[start_index:]: file_name = "{:05d}".format(count) + ".mp4" print(srt_path.split("\\")[-1].replace(".srt", "") + "共" + str(len(sequences)) + "个,当前:" + file_name + ", 当前进度:" + str("{:<.2f}".format((count / len(sequences)) * 100)) + "%" + "," + " 耗时:" + str("{:<.2f}".format(time.time() - my_time)) + "s") start_time, end_time = get_start_end_time(i[1]) cut_video_by_start_end(video_path, save_file_path, start_time - 2, end_time + 2, file_name) count = count + 1

    推荐阅读