trace_synthesis/toy/search.py
yuyr a84d51a101 1. 增加r1生成综合策略代码和输出;
2. 增加tasks;
3. 增加analysis部分,对策略进行归纳分类,然后进行评测。
2025-04-17 17:40:15 +08:00

84 lines
3.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import subprocess
import json
def find_webm_without_txt(root_dir):
"""
Finds all .webm files in the specified directory and its subdirectories
that do not have a corresponding .txt file with the same base name.
Args:
root_dir: The root directory to search within.
Returns:
A list of tuples, where each tuple contains the path to a .webm file,
its size in bytes, and its duration in seconds.
"""
found_files = []
for subdir, _, files in os.walk(root_dir):
for file in files:
if file.endswith('.webm'):
webm_path = os.path.join(subdir, file)
base_name = os.path.splitext(file)[0]
txt_file = base_name + '.txt'
txt_path = os.path.join(subdir, txt_file)
if not os.path.exists(txt_path):
try:
# 获取文件大小
file_size = os.path.getsize(webm_path)
# 获取视频时长
duration = get_video_duration(webm_path)
# 添加路径、大小和时长到列表
found_files.append((webm_path, file_size, duration))
except OSError as e:
# 处理获取文件大小时的错误
print(f"无法获取文件信息 {webm_path}: {e}")
return found_files
def get_video_duration(video_path):
"""
使用ffprobe获取视频时长如果出错则返回-1
"""
try:
# ffprobe命令来获取视频持续时间
cmd = [
'ffprobe',
'-v', 'error',
'-show_entries', 'format=duration',
'-of', 'json',
video_path
]
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
if result.returncode != 0:
print(f"ffprobe错误 {video_path}: {result.stderr.strip()}")
return -1
# 解析JSON输出
data = json.loads(result.stdout)
duration = float(data['format']['duration'])
return duration
except (subprocess.SubprocessError, json.JSONDecodeError, KeyError, ValueError) as e:
print(f"处理视频时长时出错 {video_path}: {e}")
return -1
except FileNotFoundError:
print("ffprobe命令没有找到。请确保FFmpeg已安装并添加到PATH中。")
return -1
if __name__ == "__main__":
video_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'video'))
if not os.path.isdir(video_dir):
print(f"错误:目录 '{video_dir}' 不存在。")
else:
missing_txt_files = find_webm_without_txt(video_dir)
if missing_txt_files:
print("找到以下没有对应 .txt 文件的 .webm 文件及其大小和时长:")
# 修改打印部分以包含文件大小和时长
for file_path, file_size, duration in missing_txt_files:
duration_str = f"{duration:.2f}" if duration is not None and duration >= 0 else "未知"
print(f"{file_path} ({file_size} bytes, {duration_str})")
else:
print("所有 .webm 文件都有对应的 .txt 文件。")