import os import subprocess import json def find_webm_without_txt(root_dir): """ Finds all .webm files in the specified directory and its subdirectories that do not have a corresponding .txt file with the same base name. Args: root_dir: The root directory to search within. Returns: A list of tuples, where each tuple contains the path to a .webm file, its size in bytes, and its duration in seconds. """ found_files = [] for subdir, _, files in os.walk(root_dir): for file in files: if file.endswith('.webm'): webm_path = os.path.join(subdir, file) base_name = os.path.splitext(file)[0] txt_file = base_name + '.txt' txt_path = os.path.join(subdir, txt_file) if not os.path.exists(txt_path): try: # 获取文件大小 file_size = os.path.getsize(webm_path) # 获取视频时长 duration = get_video_duration(webm_path) # 添加路径、大小和时长到列表 found_files.append((webm_path, file_size, duration)) except OSError as e: # 处理获取文件大小时的错误 print(f"无法获取文件信息 {webm_path}: {e}") return found_files def get_video_duration(video_path): """ 使用ffprobe获取视频时长,如果出错则返回-1 """ try: # ffprobe命令来获取视频持续时间 cmd = [ 'ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'json', video_path ] result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) if result.returncode != 0: print(f"ffprobe错误 {video_path}: {result.stderr.strip()}") return -1 # 解析JSON输出 data = json.loads(result.stdout) duration = float(data['format']['duration']) return duration except (subprocess.SubprocessError, json.JSONDecodeError, KeyError, ValueError) as e: print(f"处理视频时长时出错 {video_path}: {e}") return -1 except FileNotFoundError: print("ffprobe命令没有找到。请确保FFmpeg已安装并添加到PATH中。") return -1 if __name__ == "__main__": video_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'video')) if not os.path.isdir(video_dir): print(f"错误:目录 '{video_dir}' 不存在。") else: missing_txt_files = find_webm_without_txt(video_dir) if missing_txt_files: print("找到以下没有对应 .txt 文件的 .webm 文件及其大小和时长:") # 修改打印部分以包含文件大小和时长 for file_path, file_size, duration in missing_txt_files: duration_str = f"{duration:.2f} 秒" if duration is not None and duration >= 0 else "未知" print(f"{file_path} ({file_size} bytes, {duration_str})") else: print("所有 .webm 文件都有对应的 .txt 文件。")