84 lines
3.3 KiB
Python
84 lines
3.3 KiB
Python
import os
|
||
import subprocess
|
||
import json
|
||
|
||
def find_webm_without_txt(root_dir):
|
||
"""
|
||
Finds all .webm files in the specified directory and its subdirectories
|
||
that do not have a corresponding .txt file with the same base name.
|
||
|
||
Args:
|
||
root_dir: The root directory to search within.
|
||
|
||
Returns:
|
||
A list of tuples, where each tuple contains the path to a .webm file,
|
||
its size in bytes, and its duration in seconds.
|
||
"""
|
||
found_files = []
|
||
for subdir, _, files in os.walk(root_dir):
|
||
for file in files:
|
||
if file.endswith('.webm'):
|
||
webm_path = os.path.join(subdir, file)
|
||
base_name = os.path.splitext(file)[0]
|
||
txt_file = base_name + '.txt'
|
||
txt_path = os.path.join(subdir, txt_file)
|
||
|
||
if not os.path.exists(txt_path):
|
||
try:
|
||
# 获取文件大小
|
||
file_size = os.path.getsize(webm_path)
|
||
# 获取视频时长
|
||
duration = get_video_duration(webm_path)
|
||
# 添加路径、大小和时长到列表
|
||
found_files.append((webm_path, file_size, duration))
|
||
except OSError as e:
|
||
# 处理获取文件大小时的错误
|
||
print(f"无法获取文件信息 {webm_path}: {e}")
|
||
|
||
return found_files
|
||
|
||
def get_video_duration(video_path):
|
||
"""
|
||
使用ffprobe获取视频时长,如果出错则返回-1
|
||
"""
|
||
try:
|
||
# ffprobe命令来获取视频持续时间
|
||
cmd = [
|
||
'ffprobe',
|
||
'-v', 'error',
|
||
'-show_entries', 'format=duration',
|
||
'-of', 'json',
|
||
video_path
|
||
]
|
||
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
|
||
|
||
if result.returncode != 0:
|
||
print(f"ffprobe错误 {video_path}: {result.stderr.strip()}")
|
||
return -1
|
||
|
||
# 解析JSON输出
|
||
data = json.loads(result.stdout)
|
||
duration = float(data['format']['duration'])
|
||
return duration
|
||
except (subprocess.SubprocessError, json.JSONDecodeError, KeyError, ValueError) as e:
|
||
print(f"处理视频时长时出错 {video_path}: {e}")
|
||
return -1
|
||
except FileNotFoundError:
|
||
print("ffprobe命令没有找到。请确保FFmpeg已安装并添加到PATH中。")
|
||
return -1
|
||
|
||
if __name__ == "__main__":
|
||
video_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'video'))
|
||
if not os.path.isdir(video_dir):
|
||
print(f"错误:目录 '{video_dir}' 不存在。")
|
||
else:
|
||
missing_txt_files = find_webm_without_txt(video_dir)
|
||
if missing_txt_files:
|
||
print("找到以下没有对应 .txt 文件的 .webm 文件及其大小和时长:")
|
||
# 修改打印部分以包含文件大小和时长
|
||
for file_path, file_size, duration in missing_txt_files:
|
||
duration_str = f"{duration:.2f} 秒" if duration is not None and duration >= 0 else "未知"
|
||
print(f"{file_path} ({file_size} bytes, {duration_str})")
|
||
else:
|
||
print("所有 .webm 文件都有对应的 .txt 文件。")
|