240 lines
9.9 KiB
Python
240 lines
9.9 KiB
Python
import os
|
||
import json
|
||
from dotenv import load_dotenv
|
||
import base64
|
||
from openai import OpenAI
|
||
from pathlib import Path
|
||
import concurrent.futures
|
||
from typing import Dict, Any
|
||
from datetime import datetime
|
||
|
||
# 加载环境变量
|
||
load_dotenv()
|
||
|
||
MODEL_NAME = "gpt-4o-mini"
|
||
# MODEL_NAME = "UI-TARS-72B-DPO"
|
||
|
||
def encode_image(image_path):
|
||
"""将图片文件转换为base64编码"""
|
||
with open(image_path, "rb") as image_file:
|
||
return base64.b64encode(image_file.read()).decode('utf-8')
|
||
|
||
def analyze_images(image_paths, prompt):
|
||
"""
|
||
分析多个图片并返回结果
|
||
:param image_paths: PNG图片路径列表
|
||
:param prompt: 文字指令
|
||
:return: 模型响应
|
||
"""
|
||
|
||
# 初始化API客户端
|
||
client = OpenAI(
|
||
api_key=os.getenv("OPENAI_API_KEY"),
|
||
base_url=os.getenv("OPENAI_API_BASE_URL") # 如果使用其他兼容服务,可以设置基础URL
|
||
)
|
||
|
||
|
||
# 准备消息内容
|
||
messages = [{"role": "user", "content": []}]
|
||
|
||
# 添加文字内容
|
||
messages[0]["content"].append({
|
||
"type": "text",
|
||
"text": prompt
|
||
})
|
||
|
||
# 添加所有图片
|
||
for image_path in image_paths:
|
||
base64_image = encode_image(image_path)
|
||
messages[0]["content"].append({
|
||
"type": "image_url",
|
||
"image_url": {
|
||
"url": f"data:image/png;base64,{base64_image}"
|
||
}
|
||
})
|
||
|
||
# 调用API
|
||
try:
|
||
response = client.chat.completions.create(
|
||
model=MODEL_NAME, # 使用指定模型
|
||
messages=messages,
|
||
max_tokens=4000,
|
||
temperature=0.5
|
||
)
|
||
return response.choices[0].message.content
|
||
except Exception as e:
|
||
return f"发生错误: {str(e)}"
|
||
|
||
def process_path_meta(meta):
|
||
"""
|
||
处理单个路径的元数据,生成图片路径列表和其他信息
|
||
"""
|
||
image_paths = []
|
||
for idx, (chain_id, child_num) in enumerate(zip(meta["chainIDs"], meta["chainChildNum"])):
|
||
# 构建图片文件名:ID_childNum.png
|
||
# 如果是最后一个元素,需要使用 "full" 作为 childNum
|
||
if idx == len(meta["chainIDs"]) - 1:
|
||
image_name = f"{chain_id}_full.png"
|
||
else:
|
||
image_name = f"{chain_id}_{child_num}.png"
|
||
image_path = f"screenshots/{image_name}"
|
||
image_paths.append(image_path)
|
||
|
||
|
||
return {
|
||
"image_paths": image_paths,
|
||
"urls": meta["chainUrls"],
|
||
"text": meta["chainTexts"],
|
||
"boundingbox": meta["chainViewportBoundingBoxes"],
|
||
}
|
||
|
||
def process_single_path(url: str, meta: Dict[str, Any], path_index: int) -> Dict[str, Any]:
|
||
"""
|
||
处理单个路径的分析
|
||
"""
|
||
# 跳过空值
|
||
if meta is None:
|
||
return None
|
||
|
||
# 处理路径数据
|
||
processed_data = process_path_meta(meta)
|
||
|
||
# 构建提示词
|
||
prompt_template = r"""You are a GUI agent.
|
||
|
||
根据给定的{urls_length}个网页截图,总结网页截图完成了一个什么样的任务,
|
||
从第一个到倒数第二个网页用户点击的按钮文字text list分别是{text},最后一个网页是最终到达目的页面。
|
||
page_description中描述了用户在每个网页中看到的内容。
|
||
action_description中描述了用户在每个网页中点击的元素,这里元素的文字(用[]包裹)要和前面提供的text list是对应的,还要描述元素所处周围环境。
|
||
task_summaries中提炼轨迹可能对应的用户完成任务,任务内容要无歧义可以验证的,并且要和page_description和action_description相匹配,task_summaries中要包含不少于三个任务。
|
||
|
||
示例输出
|
||
{{
|
||
"page_description": [
|
||
"这看起来是一个Granfana首页界面,左边的导航栏已经展开,并且导航栏不是首屏,有滑动痕迹",
|
||
"这看起来是点击了左侧导航栏的Probes选项后显示的Probes列表页面,截图最上面显示当前路径Home > Testing & synthetics > Synthetics > Probes 。而且列表页中显示了多个探测器,每个探测器有名称、版本和有一个View的按钮。页面看起来不是首屏,有滑动痕迹",
|
||
"这是最终到达探测器详情页,标题是Viewing public probe Tokyo(APAC),页面中还显示了该探测的的Status,Reachability,Location information,Version, Last offline, Last modified, Region等信息。"
|
||
],
|
||
"action_description": [
|
||
"点击了[Probes]选项。周围环境:Probes选择在导航栏三级菜单,一级菜单是Testing & synthetics,二级菜单是Synthetics,三级菜单有hecks,Probes和Alerts三个选项,我点击了Probes选项。",
|
||
"点击了[Tokyo(APAC)]文字标题。周围环境:Tokyo(APAC)探测器条目在Probes列表页面中,每个探测器有名称、版本和有一个View的按钮",
|
||
],
|
||
"task_summaries": [
|
||
{{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的状态。", "answer": "Online" }},
|
||
{{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Reachability。", "answer": "100.0%" }},
|
||
{{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Region。", "answer": "APAC" }},
|
||
{{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Version。", "answer": "v0.10.5-0-g9201a28" }},
|
||
{{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Last offline。", "answer": "March 18, 2025 at 05:23 AM" }},
|
||
{{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Last modified。", "answer": "March 04, 2025 at 07:17 AM" }},
|
||
{{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Location information的Lattitude。", "answer": "35.6762" }},
|
||
{{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Location information的Longitude。", "answer": "139.6503" }}
|
||
]
|
||
}}
|
||
|
||
"""
|
||
|
||
# 格式化提示词
|
||
prompt = prompt_template.format(
|
||
urls_length=len(processed_data["urls"]),
|
||
urls=processed_data["urls"],
|
||
text=processed_data["text"]
|
||
)
|
||
|
||
print(f"Processing path {path_index} for URL: {url}")
|
||
|
||
# 调用API分析图片
|
||
result = analyze_images(processed_data["image_paths"], prompt)
|
||
print(f" path {path_index} for url {url} result: {result}")
|
||
|
||
try:
|
||
meta["raw_result"] = result
|
||
# 清理和规范化JSON字符串
|
||
parsed_result = result.strip()
|
||
# 移除可能的前缀对话内容
|
||
if "assistant" in parsed_result.lower():
|
||
parsed_result = parsed_result.split("assistant", 1)[-1].strip()
|
||
|
||
# 查找第一个 { 和最后一个 } 之间的内容
|
||
start = parsed_result.find('{')
|
||
end = parsed_result.rfind('}')
|
||
if start != -1 and end != -1:
|
||
parsed_result = parsed_result[start:end+1]
|
||
|
||
# 尝试解析JSON
|
||
try:
|
||
result_json = json.loads(parsed_result)
|
||
meta["page_description"] = result_json.get("page_description", "未能获取页面描述")
|
||
meta["action_description"] = result_json.get("action_description", "未能获取动作描述")
|
||
meta["task_summaries"] = result_json.get("task_summaries", "未能获取任务摘要")
|
||
except json.JSONDecodeError as e:
|
||
print(f"JSON parsing error for URL {url}: {str(e)}")
|
||
meta["page_description"] = "解析错误:无效的JSON格式"
|
||
meta["action_description"] = f"原始响应:{parsed_result}"
|
||
meta["task_summaries"] = f"原始响应:{parsed_result}"
|
||
|
||
except Exception as e:
|
||
print(f"Error processing result for URL {url}: {str(e)}")
|
||
meta["page_description"] = "处理错误"
|
||
meta["action_description"] = f"错误信息:{str(e)}"
|
||
meta["task_summaries"] = f"错误信息:{str(e)}"
|
||
|
||
return meta
|
||
|
||
def update_json_with_analysis(json_path: str, max_workers: int = 4):
|
||
"""
|
||
读取JSON文件,为每个路径添加分析结果,使用并行处理
|
||
"""
|
||
# 读取JSON文件
|
||
with open(json_path, 'r', encoding='utf-8') as f:
|
||
data = json.load(f)
|
||
|
||
# 创建任务列表
|
||
tasks = []
|
||
for url, url_data in data.items():
|
||
for i, meta in enumerate(url_data.get("shortestPathsMeta", [])):
|
||
if meta is not None:
|
||
tasks.append((url, meta, i))
|
||
|
||
# 定义输出文件路径
|
||
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
||
output_path = json_path.replace('.json', f'_with_analysis_{timestamp}.json')
|
||
|
||
processed_count = 0
|
||
|
||
# 使用线程池并行处理
|
||
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||
# 提交所有任务
|
||
future_to_task = {
|
||
executor.submit(process_single_path, url, meta, i): (url, i)
|
||
for url, meta, i in tasks
|
||
}
|
||
|
||
# 获取结果并更新数据
|
||
for future in concurrent.futures.as_completed(future_to_task):
|
||
url, path_index = future_to_task[future]
|
||
try:
|
||
result = future.result()
|
||
if result is not None:
|
||
data[url]["shortestPathsMeta"][path_index] = result
|
||
processed_count += 1
|
||
# 每处理10个条目保存一次
|
||
if processed_count % 10 == 0:
|
||
with open(output_path, 'w', encoding='utf-8') as f_out:
|
||
json.dump(data, f_out, ensure_ascii=False, indent=2)
|
||
print(f"已处理{processed_count}个条目,保存到{output_path}")
|
||
except Exception as e:
|
||
print(f"Error processing path {path_index} for URL {url}: {str(e)}")
|
||
|
||
# 最后保存所有数据
|
||
with open(output_path, 'w', encoding='utf-8') as f_out:
|
||
json.dump(data, f_out, ensure_ascii=False, indent=2)
|
||
print(f"全部处理完成,最终保存到{output_path}")
|
||
|
||
def main():
|
||
# 更新为处理JSON文件
|
||
json_path = "path/processed_3.json"
|
||
update_json_with_analysis(json_path, max_workers=2) # 可以根据需要调整并发数
|
||
|
||
if __name__ == "__main__":
|
||
main()
|