crawlee/misc/trajectory_analysis_v17.py
2025-04-23 12:14:50 +08:00

240 lines
9.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import json
from dotenv import load_dotenv
import base64
from openai import OpenAI
from pathlib import Path
import concurrent.futures
from typing import Dict, Any
from datetime import datetime
# 加载环境变量
load_dotenv()
MODEL_NAME = "gpt-4o-mini"
# MODEL_NAME = "UI-TARS-72B-DPO"
def encode_image(image_path):
"""将图片文件转换为base64编码"""
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def analyze_images(image_paths, prompt):
"""
分析多个图片并返回结果
:param image_paths: PNG图片路径列表
:param prompt: 文字指令
:return: 模型响应
"""
# 初始化API客户端
client = OpenAI(
api_key=os.getenv("OPENAI_API_KEY"),
base_url=os.getenv("OPENAI_API_BASE_URL") # 如果使用其他兼容服务可以设置基础URL
)
# 准备消息内容
messages = [{"role": "user", "content": []}]
# 添加文字内容
messages[0]["content"].append({
"type": "text",
"text": prompt
})
# 添加所有图片
for image_path in image_paths:
base64_image = encode_image(image_path)
messages[0]["content"].append({
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{base64_image}"
}
})
# 调用API
try:
response = client.chat.completions.create(
model=MODEL_NAME, # 使用指定模型
messages=messages,
max_tokens=4000,
temperature=0.5
)
return response.choices[0].message.content
except Exception as e:
return f"发生错误: {str(e)}"
def process_path_meta(meta):
"""
处理单个路径的元数据,生成图片路径列表和其他信息
"""
image_paths = []
for idx, (chain_id, child_num) in enumerate(zip(meta["chainIDs"], meta["chainChildNum"])):
# 构建图片文件名ID_childNum.png
# 如果是最后一个元素,需要使用 "full" 作为 childNum
if idx == len(meta["chainIDs"]) - 1:
image_name = f"{chain_id}_full.png"
else:
image_name = f"{chain_id}_{child_num}.png"
image_path = f"screenshots/{image_name}"
image_paths.append(image_path)
return {
"image_paths": image_paths,
"urls": meta["chainUrls"],
"text": meta["chainTexts"],
"boundingbox": meta["chainViewportBoundingBoxes"],
}
def process_single_path(url: str, meta: Dict[str, Any], path_index: int) -> Dict[str, Any]:
"""
处理单个路径的分析
"""
# 跳过空值
if meta is None:
return None
# 处理路径数据
processed_data = process_path_meta(meta)
# 构建提示词
prompt_template = r"""You are a GUI agent.
根据给定的{urls_length}个网页截图,总结网页截图完成了一个什么样的任务,
从第一个到倒数第二个网页用户点击的按钮文字text list分别是{text},最后一个网页是最终到达目的页面。
page_description中描述了用户在每个网页中看到的内容。
action_description中描述了用户在每个网页中点击的元素这里元素的文字(用[]包裹)要和前面提供的text list是对应的还要描述元素所处周围环境。
task_summaries中提炼轨迹可能对应的用户完成任务任务内容要无歧义可以验证的并且要和page_description和action_description相匹配task_summaries中要包含不少于三个任务。
示例输出
{{
"page_description": [
"这看起来是一个Granfana首页界面左边的导航栏已经展开并且导航栏不是首屏有滑动痕迹",
"这看起来是点击了左侧导航栏的Probes选项后显示的Probes列表页面截图最上面显示当前路径Home > Testing & synthetics > Synthetics > Probes 。而且列表页中显示了多个探测器每个探测器有名称、版本和有一个View的按钮。页面看起来不是首屏有滑动痕迹",
"这是最终到达探测器详情页标题是Viewing public probe Tokyo(APAC)页面中还显示了该探测的的StatusReachabilityLocation informationVersion, Last offline, Last modified, Region等信息。"
],
"action_description": [
"点击了[Probes]选项。周围环境Probes选择在导航栏三级菜单一级菜单是Testing & synthetics二级菜单是Synthetics三级菜单有hecksProbes和Alerts三个选项我点击了Probes选项。",
"点击了[Tokyo(APAC)]文字标题。周围环境Tokyo(APAC)探测器条目在Probes列表页面中每个探测器有名称、版本和有一个View的按钮",
],
"task_summaries": [
{{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的状态。", "answer": "Online" }},
{{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Reachability。", "answer": "100.0%" }},
{{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Region。", "answer": "APAC" }},
{{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Version。", "answer": "v0.10.5-0-g9201a28" }},
{{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Last offline。", "answer": "March 18, 2025 at 05:23 AM" }},
{{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Last modified。", "answer": "March 04, 2025 at 07:17 AM" }},
{{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Location information的Lattitude。", "answer": "35.6762" }},
{{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Location information的Longitude。", "answer": "139.6503" }}
]
}}
"""
# 格式化提示词
prompt = prompt_template.format(
urls_length=len(processed_data["urls"]),
urls=processed_data["urls"],
text=processed_data["text"]
)
print(f"Processing path {path_index} for URL: {url}")
# 调用API分析图片
result = analyze_images(processed_data["image_paths"], prompt)
print(f" path {path_index} for url {url} result: {result}")
try:
meta["raw_result"] = result
# 清理和规范化JSON字符串
parsed_result = result.strip()
# 移除可能的前缀对话内容
if "assistant" in parsed_result.lower():
parsed_result = parsed_result.split("assistant", 1)[-1].strip()
# 查找第一个 { 和最后一个 } 之间的内容
start = parsed_result.find('{')
end = parsed_result.rfind('}')
if start != -1 and end != -1:
parsed_result = parsed_result[start:end+1]
# 尝试解析JSON
try:
result_json = json.loads(parsed_result)
meta["page_description"] = result_json.get("page_description", "未能获取页面描述")
meta["action_description"] = result_json.get("action_description", "未能获取动作描述")
meta["task_summaries"] = result_json.get("task_summaries", "未能获取任务摘要")
except json.JSONDecodeError as e:
print(f"JSON parsing error for URL {url}: {str(e)}")
meta["page_description"] = "解析错误无效的JSON格式"
meta["action_description"] = f"原始响应:{parsed_result}"
meta["task_summaries"] = f"原始响应:{parsed_result}"
except Exception as e:
print(f"Error processing result for URL {url}: {str(e)}")
meta["page_description"] = "处理错误"
meta["action_description"] = f"错误信息:{str(e)}"
meta["task_summaries"] = f"错误信息:{str(e)}"
return meta
def update_json_with_analysis(json_path: str, max_workers: int = 4):
"""
读取JSON文件为每个路径添加分析结果使用并行处理
"""
# 读取JSON文件
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# 创建任务列表
tasks = []
for url, url_data in data.items():
for i, meta in enumerate(url_data.get("shortestPathsMeta", [])):
if meta is not None:
tasks.append((url, meta, i))
# 定义输出文件路径
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
output_path = json_path.replace('.json', f'_with_analysis_{timestamp}.json')
processed_count = 0
# 使用线程池并行处理
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
# 提交所有任务
future_to_task = {
executor.submit(process_single_path, url, meta, i): (url, i)
for url, meta, i in tasks
}
# 获取结果并更新数据
for future in concurrent.futures.as_completed(future_to_task):
url, path_index = future_to_task[future]
try:
result = future.result()
if result is not None:
data[url]["shortestPathsMeta"][path_index] = result
processed_count += 1
# 每处理10个条目保存一次
if processed_count % 10 == 0:
with open(output_path, 'w', encoding='utf-8') as f_out:
json.dump(data, f_out, ensure_ascii=False, indent=2)
print(f"已处理{processed_count}个条目,保存到{output_path}")
except Exception as e:
print(f"Error processing path {path_index} for URL {url}: {str(e)}")
# 最后保存所有数据
with open(output_path, 'w', encoding='utf-8') as f_out:
json.dump(data, f_out, ensure_ascii=False, indent=2)
print(f"全部处理完成,最终保存到{output_path}")
def main():
# 更新为处理JSON文件
json_path = "path/processed_3.json"
update_json_with_analysis(json_path, max_workers=2) # 可以根据需要调整并发数
if __name__ == "__main__":
main()