初始化

2025-04-23 12:14:50 +08:00 · 2025-04-23 12:14:50 +08:00 · b36a646548
commit b36a646548
50 changed files with 465378 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,21 @@
 node_modules
 backup
 screenshots
 storage
 pages
 path
 crawlee
 axtrees
 .env
 .env.*
--- a/README.md
+++ b/README.md
@ -0,0 +1,35 @@
 # 1. 运行环境
 安装nodejs v18.7.1
 # 2. 运行数据飞轮
 ```bash
 sh run_crawler.sh
 ```
 - 修改入口URL：修改crawl_grafana_v18.js中所有play.grafana.org的网址。
 - 停止任务：目前程序不会自动停止，需要手工杀进程；程序维持一个任务队列，队列为空就不再继续爬。
 # 3. 产出物
 运行数据飞轮以后，会自动创建axtrees, pages, path, screenshots, storage等目录：
 - storage：（运行状态数据）crawlee框架运行过程记录任务。
   - 其中`storage/key_value_stores/default/SDK_CRAWLER_STATISTICS_0.json`中记录
      - "requestsFinished": 4136, // 记录完成多少个请求（即遍历了多少个网页）
      - "requestsFailed": 14, // 记录失败数
   - `storage/request_queues/default`目录下记录爬到并且加入队列的的链接，其中里边的json.userData字段记录了从首页到当前页面的路径等轨迹元数据
 - page/screenshots/axtree（快照数据）: 记录遍历的网页的html, 截图和axtree
 - path（轨迹数据）: 记录每个网页父节点到子节点的边(child目录)，以及processedutlToPath记录到达某个网页地址的路径/最短路径。
 # 4. 分析轨迹数据
 停止数据飞轮以后，使用 `analysis_v18.js` 可以对轨迹数据进行进一步的处理，按照最短路径长度分组，保存在path目录下。
 # 5. 可视化展示
 - vscode安装Live Server插件。右键点击`index.html`，选择"Open with Live Server"。在浏览器中查看轨迹数据。
 - 归档的产出物在`archive_artifact_v18.tar.gz`中，可以解压缩，然后测试可视化展示。
 # 6. 其他
 在`misc`目录下有对轨迹进行总结的测试，以及使用模型和总结后的任务，评估模型每一步正确率 temp_analysis。
--- a/analysis_v18.js
+++ b/analysis_v18.js
@ -0,0 +1,295 @@
 import { readFileSync, writeFileSync, existsSync } from 'fs';
 import { join } from 'path';
 // 读取 JSON 文件
 const data = JSON.parse(readFileSync('path/processedUrlToPaths.json', 'utf8'));
 // 打印 URL 总数
 console.log(`总共有 ${Object.keys(data).length} 个 URL 需要分析`);
 // 计算所有路径总数
 const totalPaths = Object.values(data).reduce((sum, paths) => sum + paths.length, 0);
 console.log(`所有 URL 的路径总数为: ${totalPaths}`);
 // 获取请求元信息的函数
 function getRequestMetadata(requestId) {
    try {
        const requestPath = join('storage', 'request_queues', 'default', `${requestId}.json`);
        const requestData = JSON.parse(readFileSync(requestPath, 'utf8'));
        // 解析 json 字段中的字符串
        const jsonData = JSON.parse(requestData.json);
        const userData = jsonData.userData || {};
        // 创建结果对象，添加 request_id
        const result = { request_id: requestId };
        // chain 是 parentChain 的追加id
        userData['parentChainIDs'].push(requestId);
        result['chainIDs'] = userData['parentChainIDs'];
        // chain 是 parentChain 的追加text
        result['chainTexts'] = userData['parentChainTexts'];
        // 获取 chainAxTreeID
        result['chainAxTreeID'] = getChainAxTreeIDs(userData['parentChainIDs'], userData['parentChainTexts']);
        // chain 是 parentChain 的追加url
        userData['parentChain'].push(requestData.url);
        result['chainUrls'] = userData['parentChain'];
        result['chainChildNum'] = userData['parentChainChildNum'];
        result['chainPageBoundingBoxes'] = userData['parentChainPageBoundingBoxes'];
        result['chainViewportBoundingBoxes'] = userData['parentChainViewportBoundingBoxes'];
        // chain 是 parentChain 的追加scroll
        userData['parentChainScrolls'].push(userData['elementPosition'].scroll);
        result['chainScrolls'] = userData['parentChainScrolls'];
        return result;
    } catch (error) {
        console.error(`无法读取请求 ${requestId} 的元信息:`, error.message);
        return null;
    }
 }
 // 获取 chainAxTreeID 的函数
 function getChainAxTreeIDs(chainIDs, chainTexts) {
    const axTreeIDs = [];
    // 遍历每个 chainID 和对应的 chainText
    for (let i = 0; i < chainIDs.length; i++) {
        const id = chainIDs[i];
        const text = chainTexts[i];
        if (!id || !text) continue;
        try {
            // 读取对应的 axtree 文件
            const axTreePath = join('axtrees', `${id}.txt`);
            if (!existsSync(axTreePath)) {
                axTreeIDs.push(null);
                continue;
            }
            const axTreeContent = readFileSync(axTreePath, 'utf8');
            // 查找包含指定文本和 clickable 属性的行
            const lines = axTreeContent.split('\n');
            let matchedAxTreeIDs = [];
            const matchText = `'${text}'`
            for (let j = 0; j < lines.length; j++) {
                const line = lines[j];
                if (line.includes(matchText) && line.includes('clickable')) {
                    // 提取 ID 编号
                    const match = line.match(/\[\s*(\d+)\s*\]/);
                    if (match && match[1]) {
                        matchedAxTreeIDs.push(parseInt(match[1]));
                    }
                }
            }
            // 如果有多个匹配，用逗号连接它们
            if (matchedAxTreeIDs.length > 0) {
                axTreeIDs.push(matchedAxTreeIDs.join(','));
            } else {
                axTreeIDs.push(null);
            }
        } catch (error) {
            console.error(`无法读取 axtree 文件 ${id}:`, error.message);
            axTreeIDs.push(null);
        }
    }
    return axTreeIDs;
 }
 // 存储分析结果
 const analysis = {};
 const shortestPathLengths = [];
 let totalShortestPaths = 0; // 添加总最短路径计数器
 // 添加缓存对象
 const validRequestCache = new Map();
 // 检查请求 ID 是否合法的函数
 function isValidRequest(requestId) {
    // 先检查缓存
    if (validRequestCache.has(requestId)) {
        return validRequestCache.get(requestId);
    }
    // 检查对应的 HTML 文件是否存在
    const htmlPath = join('pages', `${requestId}.html`);
    const isValid = existsSync(htmlPath);
    // 存入缓存
    validRequestCache.set(requestId, isValid);
    return isValid;
 }
 // 检查路径是否合法的函数
 function isValidPath(path) {
    return path.every(requestId => isValidRequest(requestId));
 }
 // 分析每个 URL
 for (const [url, paths] of Object.entries(data)) {
    analysis[url] = {
        totalPaths: paths.length,
        shortestPathLength: Infinity,
        shortestPaths: [],
        shortestPathsMeta: [],
        shortestPathsSet: new Set()
    };
    // 先过滤出合法路径
    const validPaths = paths.filter(path => isValidPath(path));
    // 如果没有合法路径，跳过这个 URL
    if (validPaths.length === 0) {
        delete analysis[url];
        continue;
    }
    // 在合法路径中找出最短路径
    validPaths.forEach(path => {
        if (path.length < analysis[url].shortestPathLength) {
            analysis[url].shortestPathLength = path.length;
            analysis[url].shortestPaths = [path];
            analysis[url].shortestPathsSet = new Set([JSON.stringify(path)]);
            const lastRequestId = path[path.length - 1];
            analysis[url].shortestPathsMeta = [getRequestMetadata(lastRequestId)];
        } else if (path.length === analysis[url].shortestPathLength) {
            const pathStr = JSON.stringify(path);
            if (!analysis[url].shortestPathsSet.has(pathStr)) {
                analysis[url].shortestPathsSet.add(pathStr);
                analysis[url].shortestPaths.push(path);
                const lastRequestId = path[path.length - 1];
                analysis[url].shortestPathsMeta.push(getRequestMetadata(lastRequestId));
            }
        }
    });
    // 删除临时使用的 Set
    delete analysis[url].shortestPathsSet;
    // 添加统计信息
    shortestPathLengths.push(analysis[url].shortestPathLength);
    // 添加最短路径数量信息（现在是去重后的数量）
    analysis[url].shortestPathCount = analysis[url].shortestPaths.length;
    totalShortestPaths += analysis[url].shortestPathCount;
 }
 // 打印统计信息
 console.log(`有 ${Object.values(analysis).filter(a => a.shortestPathCount > 1).length} 个 URL 具有多条最短路径`);
 console.log(`所有 URL 的最短路径总数为: ${totalShortestPaths}`);
 // 按路径长度分组
 const analysisByLength = {};
 Object.entries(analysis).forEach(([url, data]) => {
    const length = data.shortestPathLength;
    if (!analysisByLength[length]) {
        analysisByLength[length] = {};
    }
    analysisByLength[length][url] = data;
 });
 // 将完整分析结果写入文件
 writeFileSync('path/processed.json', JSON.stringify(analysis, null, 2));
 // 为每个路径长度创建单独的文件
 Object.entries(analysisByLength).forEach(([length, data]) => {
    const filename = `path/processed_${length}.json`;
    writeFileSync(filename, JSON.stringify(data, null, 2));
    console.log(`路径长度为 ${length} 的URL数量: ${Object.keys(data).length}`);
 });
 // 计算最短路径长度的分布
 const lengthDistribution = {};
 shortestPathLengths.forEach(length => {
    lengthDistribution[length] = (lengthDistribution[length] || 0) + 1;
 });
 // 计算累积分布
 const totalUrls = shortestPathLengths.length;
 const cumulativeDistribution = {};
 let cumulative = 0;
 Object.keys(lengthDistribution)
    .sort((a, b) => Number(a) - Number(b))
    .forEach(length => {
        cumulative += lengthDistribution[length];
        cumulativeDistribution[length] = cumulative / totalUrls;
    });
 // 统计具有多条最短路径的 URL 数量
 const multipleShortestPathsCount = Object.values(analysis).filter(a => a.shortestPathCount > 1).length;
 // 将分布分析结果写入单独的文件
 const distributionAnalysis = {
    pathLengthDistribution: lengthDistribution,
    cumulativeDistribution: cumulativeDistribution,
    totalUrlsAnalyzed: totalUrls,
    multipleShortestPathsUrls: multipleShortestPathsCount,
    statistics: {
        minLength: Math.min(...shortestPathLengths),
        maxLength: Math.max(...shortestPathLengths),
        averageLength: shortestPathLengths.reduce((a, b) => a + b, 0) / totalUrls
    }
 };
 writeFileSync('path/distribution_analysis.json', JSON.stringify(distributionAnalysis, null, 2));
 // 打印平均路径长度
 console.log(`所有 URL 的平均最短路径长度为: ${distributionAnalysis.statistics.averageLength.toFixed(2)}`);
 // 生成绘图用的 HTML 文件
 const htmlContent = `
 <!DOCTYPE html>
 <html>
 <head>
    <title>Shortest Path Length Distribution</title>
    <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
 </head>
 <body>
    <div id="plot"></div>
    <script>
        const distribution = ${JSON.stringify(lengthDistribution)};
        const x = Object.keys(distribution);
        const y = Object.values(distribution);
        const trace = {
            x: x,
            y: y,
            type: 'bar',
            name: 'Path Length Distribution'
        };
        const layout = {
            title: 'URL Shortest Path Length Distribution',
            xaxis: {
                title: 'Shortest Path Length',
                tickmode: 'linear'
            },
            yaxis: {
                title: 'Number of URLs'
            }
        };
        Plotly.newPlot('plot', [trace], layout);
    </script>
 </body>
 </html>
 `;
 // 改成linux路径
 writeFileSync('path/distribution.html', htmlContent);
 // 打印具有多条最短路径的 URL 数量
 console.log(`有 ${multipleShortestPathsCount} 个 URL 具有多条最短路径`);
--- a/crawl_grafana_v18.js
+++ b/crawl_grafana_v18.js
@ -0,0 +1,589 @@
 import { PlaywrightCrawler, RequestQueue } from 'crawlee';
 import fs from 'fs';
 import path from 'path';
 import { fileURLToPath } from 'url';
 import { v4 as uuidv4 } from 'uuid';
 import { createWriteStream } from 'fs';
 import { Log } from 'crawlee';
 // 获取 __dirname（ES 模块下的写法）
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
 // 创建日志文件流
 const logFile = createWriteStream(__filename.replace('.js', '.log'), { flags: 'a' });
 // 创建自定义日志函数
 const writeToLog = (level, message) => {
    const logMessage = `[${new Date().toISOString()}] ${level}: ${message}\n`;
    logFile.write(logMessage);
 };
 // 重写 Crawlee 的全局日志系统
 const originalLogFunctions = {
    debug: Log.debug,
    info: Log.info,
    warning: Log.warning,
    warn: Log.warn,
    error: Log.error
 };
 Log.debug = function(msg, options) {
    writeToLog('DEBUG', msg);
    return originalLogFunctions.debug(msg, options);
 };
 Log.info = function(msg, options) {
    writeToLog('INFO', msg);
    return originalLogFunctions.info(msg, options);
 };
 Log.warning = Log.warn = function(msg, options) {
    writeToLog('WARN', msg);
    return originalLogFunctions.warning(msg, options);
 };
 Log.error = function(msg, options) {
    writeToLog('ERROR', msg);
    return originalLogFunctions.error(msg, options);
 };
 // 控制台日志也重定向到文件
 const originalConsoleLog = console.log;
 const originalConsoleWarn = console.warn;
 const originalConsoleError = console.error;
 console.log = function(...args) {
    const message = args.map(arg => 
        typeof arg === 'object' ? JSON.stringify(arg) : arg
    ).join(' ');
    logFile.write(`[${new Date().toISOString()}] INFO: ${message}\n`);
    originalConsoleLog.apply(console, args);
 };
 console.warn = function(...args) {
    const message = args.map(arg => 
        typeof arg === 'object' ? JSON.stringify(arg) : arg
    ).join(' ');
    logFile.write(`[${new Date().toISOString()}] WARN: ${message}\n`);
    originalConsoleWarn.apply(console, args);
 };
 console.error = function(...args) {
    const message = args.map(arg => 
        typeof arg === 'object' ? JSON.stringify(arg) : arg
    ).join(' ');
    logFile.write(`[${new Date().toISOString()}] ERROR: ${message}\n`);
    originalConsoleError.apply(console, args);
 };
 // 添加进程退出时的日志文件关闭处理
 process.on('exit', () => {
    logFile.end();
 });
 // 全局异常处理，防止未捕获异常导致程序退出
 process.on('uncaughtException', (err) => {
    console.error('未捕获的异常:', err);
 });
 // 在文件开头添加 SIGINT 信号处理
 process.on('SIGINT', () => {
    console.log('\n检测到 Ctrl+C，正在保存数据并退出程序...');
    // 保存最终的 processedUrlToPaths
    const mapObject = Object.fromEntries(processedUrlToPaths);
    fs.writeFileSync(
        path.join(pathDir, 'processedUrlToPaths.json'), 
        JSON.stringify(mapObject, null, 2)
    );
    console.log('数据已保存，程序退出！');
    logFile.end(); // 确保日志文件正确关闭
    process.exit(0);  // 正常退出程序
 });
 // 定义保存 HTML 与截图的目录
 const pagesDir = path.join(__dirname, 'pages');
 const axtreesDir = path.join(__dirname, 'axtrees');
 const screenshotsDir = path.join(__dirname, 'screenshots');
 const storageDir = path.join(__dirname, 'storage');
 const pathDir = path.join(__dirname, 'path');
 const childsDir = path.join(pathDir, 'childs'); // 新增子链接记录目录
 if (fs.existsSync(pagesDir)) fs.rmSync(pagesDir, { recursive: true });
 if (fs.existsSync(screenshotsDir)) fs.rmSync(screenshotsDir, { recursive: true });
 if (fs.existsSync(storageDir)) fs.rmSync(storageDir, { recursive: true });
 if (fs.existsSync(pathDir)) fs.rmSync(pathDir, { recursive: true });
 if (fs.existsSync(childsDir)) fs.rmSync(childsDir, { recursive: true });
 if (fs.existsSync(axtreesDir)) fs.rmSync(axtreesDir, { recursive: true });
 fs.mkdirSync(pagesDir);
 fs.mkdirSync(screenshotsDir);
 fs.mkdirSync(storageDir);
 fs.mkdirSync(pathDir);
 fs.mkdirSync(childsDir); // 创建子链接记录目录
 fs.mkdirSync(axtreesDir); // 创建axtree目录
 console.log("启动爬虫...");
 // 全局保存已经处理过的页面路径，每个页面对应一个list，list中每个元素是到达该页面的路径
 const processedUrlToPaths = new Map();
 // 全局保存已经处理过的页面路径，每个页面对应一个list，list中每个元素是到达该页面的路径
 const processedUrlToParentChainLength = new Map();
 // 全局保存已经探索过的url，用于去重
 const urlExplored = new Set();
 // 原子计数器，用于记录已处理过的页面数量
 let processedRequestsCount = 0;
 (async () => {
    // 打开请求队列，建议在首次运行前清空 storage/request_queues 文件夹
    const requestQueue = await RequestQueue.open();
    // 使用 UUID 作为初始请求的 uniqueKey 和 id
    await requestQueue.addRequest({ 
        // url : "https://play.grafana.org/a/grafana-app-observability-app",
        // uniqueKey : "https://play.grafana.org/a/grafana-app-observability-app_0",
        url: 'https://play.grafana.org',
        uniqueKey: 'https://play.grafana.org_0',
        userData: { parentChain: [], parentChainIDs: [] }
    });
    const crawler = new PlaywrightCrawler({
        requestQueue,
        keepAlive: true,
        navigationTimeoutSecs: 120,
        requestHandlerTimeoutSecs: 360,
        async handlePageFunction({ page, request, enqueueLinks, log }) {
            // 重写 log 对象的方法
            const originalLog = log;
            log = {
                ...originalLog,
                info: (message) => {
                    writeToLog('INFO', message);
                    originalLog.info(message);
                },
                debug: (message) => {
                    writeToLog('DEBUG', message);
                    originalLog.debug(message);
                },
                warning: (message) => {
                    writeToLog('WARN', message);
                    originalLog.warning(message);
                },
                warn: (message) => {  // 添加 warn 方法
                    writeToLog('WARN', message);
                    originalLog.warn(message);
                },
                error: (message) => {
                    writeToLog('ERROR', message);
                    originalLog.error(message);
                }
            };
            // 获取页面最终重定向后的 URL（去除参数）
            const finalUrl = page.url().split('?')[0];
            // 如果已经探索过该页面，则直接返回
            if (urlExplored.has(finalUrl)) {
                log.info(`页面 ${finalUrl} 已探索，跳过当前请求`);
                return;
            }
            // 获取当前页面的完整路径
            const fullChain = [...(request.userData.parentChainIDs || []), request.id];
            // 获取已经存入processedUrlToPaths的页面路径
            const processedPath = processedUrlToPaths.get(finalUrl);
            if (processedPath) {
                processedPath.push(fullChain);
                processedUrlToPaths.set(finalUrl, processedPath);
                log.info(`Final URL ${finalUrl} 记录新的可达路径：${fullChain}。`);
            } else {
                processedUrlToPaths.set(finalUrl, [fullChain]);
                log.info(`Final URL ${finalUrl} 记录第一个可达路径：${fullChain}。`);
            }
            processedRequestsCount++;
            // 每处理10个请求保存processedUrlToPaths
            if (processedRequestsCount % 10 === 0) {
                const mapObject = Object.fromEntries(processedUrlToPaths);
                fs.writeFileSync(path.join(pathDir, 'processedUrlToPaths.json'), JSON.stringify(mapObject, null, 2));
                log.info(`已保存 processedUrlToPaths.json`);
            }
            // 设置页面视口大小
            await page.setViewportSize({ width: 2560, height: 1440 });
            // await page.setViewportSize({ width: 1280, height: 720 });
            // 增加超时时间到 120 秒
            await page.goto(request.url, { 
                timeout: 120000,  // 增加到 120 秒
                waitUntil: 'domcontentloaded'  // 改为只等待 DOM 加载完成，不等待所有资源
            });
            console.log('页面加载完成');
            // 等待页面稳定
            try {
                await page.waitForLoadState('networkidle', { timeout: 30000 });
            } catch (e) {
                console.log('网络未完全空闲，但继续执行：', e.message);
            }
            // 在等待 networkidle 之前，先展开所有可折叠内容
            console.log('\n开始展开导航项...');
            const clickedButtons = new Set();
            const expandButtons = async () => {
                console.log('开始寻找可展开按钮...');
                const buttons = await page.$$('button[aria-label*="Expand"], button[aria-label*="Open"], button[aria-expanded="false"]');
                console.log(`找到 ${buttons.length} 个折叠按钮`);
                let newButtonsFound = false;
                for (const button of buttons) {
                    try {
                        // 使用更可靠的滚动方法
                        await page.evaluate(element => {
                            // 使用JavaScript的scrollIntoView，更直接且兼容性更好
                            element.scrollIntoView({behavior: 'smooth', block: 'center'});
                        }, button);
                        const ariaLabel = await button.getAttribute('aria-label');
                        if (!clickedButtons.has(ariaLabel)) {
                            console.log(`点击新按钮: ${ariaLabel}`);
                            await button.click();
                            clickedButtons.add(ariaLabel);
                            newButtonsFound = true;
                            await page.waitForTimeout(200);
                        }
                    } catch (e) {
                        console.log(`点击失败: ${e.message}`);
                    }
                }
                return newButtonsFound;
            };
            let iteration = 1;
            while (true) {
                console.log(`\n第 ${iteration} 次查找...`);
                const foundNewButtons = await expandButtons();
                if (!foundNewButtons) {
                    console.log('没有发现新的可展开按钮，结束查找');
                    break;
                }
                console.log(`已点击按钮数量: ${clickedButtons.size}`);
                await page.waitForTimeout(500);
                iteration++;
            }
            // 获取所有 <a> 标签的元素句柄
            const anchorHandles = await page.$$('a');
            console.log(`当前网页${finalUrl}找到 ${anchorHandles.length} 个链接`);
            // 记录子元素编号
            let childNum = 0;
            // 创建记录所有子链接和加入队列的子链接的数组
            const allChildLinks = [];
            const queuedChildLinks = [];
            for (const anchorHandle of anchorHandles) {
                childNum++;
                // 先获取 <a> 标签的 href 与文本内容
                const anchorData = await page.evaluate(el => {
                    return {
                        url: el.href,
                        text: el.innerText.trim()
                    };
                }, anchorHandle);
                // 获取元素边界框信息
                let rect = null;
                let scroll = null;
                let pageBoundingBox = null;
                try {
                    // 尝试获取元素的边界框信息
                    rect = await anchorHandle.boundingBox();
                    if (rect) {
                        scroll = await page.evaluate(() => ({ x: window.scrollX, y: window.scrollY }));
                        pageBoundingBox = {
                            x: rect.x + scroll.x,
                            y: rect.y + scroll.y,
                            width: rect.width,
                            height: rect.height,
                        };
                    }
                } catch (err) {
                    console.error(`获取元素边界框失败: ${err.message}`);
                }
                // 判断链接是否在目标域内
                const isInLoop = anchorData.url.startsWith('https://play.grafana.org');
                // 记录所有子链接，包含完整信息
                const childLink = {
                    childNum,
                    url: anchorData.url,
                    text: anchorData.text,
                    isInLoop,
                    isInQueue: false, // 默认未加入队列，后续会更新
                    viewportBoundingBox: rect,
                    pageBoundingBox: pageBoundingBox,
                    scroll: scroll
                };
                allChildLinks.push(childLink);
                // 如果链接不属于目标域，则直接跳过（即为外链）
                if (!isInLoop) continue;
                log.info(`处理链接：${anchorData.text}，childNum：${childNum}`);
                // 使用更可靠的滚动方法
                await page.evaluate(element => {
                    // 使用JavaScript的scrollIntoView，更直接且兼容性更好
                    element.scrollIntoView({behavior: 'smooth', block: 'center'});
                }, anchorHandle);
                await page.waitForTimeout(500); // 给滚动和渲染更多时间
                // 获取元素在窗口内的 bounding box
                rect = await anchorHandle.boundingBox();
                if (!rect) continue;
                // 获取当前窗口滚动偏移，用于计算页面内位置
                scroll = await page.evaluate(() => ({ x: window.scrollX, y: window.scrollY }));
                pageBoundingBox = {
                    x: rect.x + scroll.x,
                    y: rect.y + scroll.y,
                    width: rect.width,
                    height: rect.height,
                };
                // 构造新的 userData，包含父链与元素详细信息
                const newUserData = {
                    parentChainChildNum: [...(request.userData.parentChainChildNum || []), childNum],
                    parentChain: [...(request.userData.parentChain || []), request.url],
                    parentChainIDs: [...(request.userData.parentChainIDs || []), request.id],
                    parentChainTexts: [...(request.userData.parentChainTexts || []), anchorData.text],
                    parentChainScrolls: [...(request.userData.parentChainScrolls || []), scroll],
                    parentChainViewportBoundingBoxes: [...(request.userData.parentChainViewportBoundingBoxes || []), rect],
                    parentChainPageBoundingBoxes: [...(request.userData.parentChainPageBoundingBoxes || []), pageBoundingBox],
                    elementPosition: {
                        viewportBoundingBox: rect,
                        pageBoundingBox: pageBoundingBox,
                        scroll: scroll,
                        text: anchorData.text,
                        childNum: childNum
                    }
                };
                // 增加自定义url去重，通过全局的map记录url与parentchain长度的映射，如果当前路径长度大于等于已记录的url的parentchain长度，则跳过该url
                const urlToParentChainLength = processedUrlToParentChainLength.get(anchorData.url);
                if (urlToParentChainLength && urlToParentChainLength <= newUserData.parentChain.length) {
                    log.info(`url：${anchorData.url} 历史发现路径最短长度为${urlToParentChainLength}，当前路径长度为${newUserData.parentChain.length}，跳过当前请求url`);
                    continue;
                }
                // 记录当前url的parentchain长度，记录最小值
                processedUrlToParentChainLength.set(anchorData.url, newUserData.parentChain.length);
                const uniqueKey = `${anchorData.url}_${newUserData.parentChain.length}`;
                log.info(`请求加入队列：${anchorData.url}，uniqueKey：${uniqueKey}，childNum：${childNum}`);
                try {
                    await requestQueue.addRequest({
                        url: anchorData.url,
                        uniqueKey: uniqueKey,
                        userData: newUserData
                    });
                    // 更新原始链接的队列状态
                    childLink.isInQueue = true;
                    // 记录成功加入队列的子链接，包含完整信息
                    queuedChildLinks.push({
                        childNum,
                        url: anchorData.url,
                        text: anchorData.text,
                        uniqueKey,
                        isInLoop,
                        isInQueue: true,
                        viewportBoundingBox: rect,
                        pageBoundingBox: pageBoundingBox,
                        scroll: scroll
                    });
                } catch (err) {
                    log.info(`请求已存在或添加失败：${anchorData.url}`);
                }
                // 截图保存当前窗口
                const screenshotPath = path.join(screenshotsDir, `${request.id}_${childNum}.png`);
                await page.screenshot({ path: screenshotPath, fullPage: false });
                log.info(`已保存第${childNum}个子元素截图：${anchorData.url} -> ${screenshotPath}`);
            }
            // 保存子链接记录到JSON文件
            const childLinksData = {
                requestId: request.id,
                url: finalUrl,
                totalAnchors: anchorHandles.length,  // 添加记录总的<a>标签数量
                allChildLinks,
                queuedChildLinks,
                totalFound: allChildLinks.length,
                totalQueued: queuedChildLinks.length
            };
            const childLinksPath = path.join(childsDir, `${request.id}.json`);
            fs.writeFileSync(childLinksPath, JSON.stringify(childLinksData, null, 2));
            log.info(`已保存子链接记录：${finalUrl} -> ${childLinksPath}`);
            // 将当前页面标记为已探索
            urlExplored.add(finalUrl);
            // 保存当前页面 HTML，文件名使用 request.id
            const content = await page.content();
            const htmlFilePath = path.join(pagesDir, request.id + '.html');
            fs.writeFileSync(htmlFilePath, content);
            log.info(`已保存 HTML：${finalUrl} -> ${htmlFilePath}`);
            // 获取 AXTree
            const axTree = await page.accessibility.snapshot({ interestingOnly: false });
            // 用于存储 id 到 selector 的映射
            let idCounter = 1;
            const idToSelector = {};
            const nodeParents = new Map();
            // 用于构建文本形式的 AXTree
            let axtreeText = [];
            function traverse(node, depth = 0, parent = null) {
                nodeParents.set(node, parent);
                if (((node.role === 'none' || node.role === 'generic' || node.role === 'InlineTextBox') && 
                    !node.name && 
                    !node.focusable && 
                    !node.focused && 
                    node.expanded === undefined) ||
                    node.role === 'InlineTextBox'
                ) {
                    if (node.children?.length > 0) {
                    for (const child of node.children) {
                        traverse(child, depth, node);
                    }
                    }
                    return;
                }
                const currentId = idCounter++;
                let selectorParts = [`role=${node.role}`];
                if (node.name) {
                    selectorParts.push(`[name="${node.name}"]`);
                }
                if (node.selected) selectorParts.push('[selected=true]');
                if (node.checked !== undefined) selectorParts.push(`[checked=${node.checked}]`);
                if (node.pressed !== undefined) selectorParts.push(`[pressed=${node.pressed}]`);
                if (parent && parent.role !== 'WebArea') {
                    let parentSelector = `role=${parent.role}`;
                    if (parent.name) {
                    parentSelector += `[name="${parent.name}"]`;
                    }
                    selectorParts.unshift(`${parentSelector} >>`);
                }
                if (parent?.children) {
                    const siblingIndex = parent.children.findIndex(child => child === node);
                    if (siblingIndex !== -1) {
                    selectorParts.push(`:nth-match(${siblingIndex + 1})`);
                    }
                }
                idToSelector[currentId] = selectorParts.join(' ');
                // 收集所有可能的属性
                let props = [];
                if (node.focusable) props.push('focusable');
                if (node.focused) props.push('focused');
                if (node.expanded !== undefined) props.push(`expanded=${node.expanded}`);
                if (node.selected) props.push('selected');
                if (node.checked !== undefined) props.push(`checked=${node.checked}`);
                if (node.disabled) props.push('disabled');
                if (node.required) props.push('required');
                if (node.pressed !== undefined) props.push(`pressed=${node.pressed}`);
                // 判断元素是否可点击
                const clickableRoles = ['button', 'link', 'menuitem', 'tab', 'checkbox', 'radio', 'switch', 'option'];
                const isClickable = clickableRoles.includes(node.role) || 
                                    node.focusable || 
                                    node.role === 'generic' && node.name && node.focusable;
                if (isClickable) props.push('clickable');
                const indent = ' '.repeat(depth * 4);
                const nodeLine = `${indent}[${currentId}] ${node.role} '${node.name || ''}'${props.length > 0 ? ' (' + props.join(', ') + ')' : ''}`;
                axtreeText.push(nodeLine);
                if (node.children?.length > 0) {
                    for (const child of node.children) {
                    traverse(child, depth + 1, node);
                    }
                }
            }
            // 添加根节点信息
            let rootProps = [];
            if (axTree.focusable) rootProps.push('focusable=True');
            if (axTree.focused) rootProps.push('focused');
            axtreeText.push(`Root${axTree.role ? axTree.role : 'WebArea'} '${axTree.name || ''}'${rootProps.length > 0 ? ', ' + rootProps.join(', ') : ''}`);
            // 遍历 AXTree
            if (axTree.children?.length > 0) {
                for (const child of axTree.children) {
                    traverse(child, 1, axTree);
                }
            }
            // 保存axtree到文件
            const axtreePath = path.join(axtreesDir, `${request.id}.txt`);
            fs.writeFileSync(axtreePath, axtreeText.join('\n'));
            log.info(`已保存 axtree：${finalUrl} -> ${axtreePath}`);
            // 保存idToSelector到文件
            const idToSelectorPath = path.join(axtreesDir, `${request.id}_idToSelector.json`);
            fs.writeFileSync(idToSelectorPath, JSON.stringify(idToSelector, null, 2));
            log.info(`已保存 idToSelector：${finalUrl} -> ${idToSelectorPath}`);
            // 保存当前页面截图，文件名使用 request.id
            const fullPageScreenshotPath = path.join(screenshotsDir, `${request.id}_full.png`);
            await page.screenshot({ path: fullPageScreenshotPath, fullPage: true });
            log.info(`已保存全屏截图：${finalUrl} -> ${fullPageScreenshotPath}`);
            console.log(`记录已探索页面到全局集合：${finalUrl}`);
        },
        async handleFailedRequestFunction({ request }) {
            console.error(`请求 ${request.url} 处理失败。`);
        },
    });
    try {
        await crawler.run();
    } catch (err) {
        const mapObject = Object.fromEntries(processedUrlToPaths);
        fs.writeFileSync(path.join(pathDir, 'processedUrlToPaths.json'), JSON.stringify(mapObject, null, 2));
        log.info(`已保存 processedUrlToPaths.json`);
        console.error("爬虫运行时出错:", err);
    }
    console.log("爬虫运行结束！");
 })();
--- a/doc/webarena.md
+++ b/doc/webarena.md
@ -0,0 +1,25 @@
 # WebArena实例服务
 ## 1. 背景
 操作智能体项目在数据采集、模型训练、模型推理评估等环节，需要频繁的与WebArena实例服务进行交互。交互过程会对WebArena实例进行状态的修改，因此需要对WebArena实例服务进行实例化。
 通过统一个管理服务实现WebArena实例的自动创建、销毁、状态查询等操作。
 ## 2. 架构
 服务包含以下几个部分：
 - 控制台：提供网页界面，可以实现服务器管理、WebArena实例的创建、销毁、状态查询等操作。
 - 物理服务器：目前已有的g1-g14服务器，已经安装好NixOS和docker。需要将WebArena镜像安装到各服务器上。每台服务器上运行一个agent。
 - 实例Agent：部署在物理服务器上，负责WebArena实例的创建、销毁、可用状态查询等操作。
 ## 3. 使用流程
 ### 3.1 创建WebArena实例
 ### 3.2 销毁WebArena实例
 ### 3.3 查询WebArena实例状态
 ### 3.4 查询WebArena实例日志
--- a/index.html
+++ b/index.html
@ -0,0 +1,441 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>路径可视化</title>
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.7.0/styles/default.min.css">
    <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.7.0/highlight.min.js"></script>
    <script>
        document.addEventListener('DOMContentLoaded', () => {
            if (window.hljs) {
                hljs.configure({languages: ['json']});
            }
        });
    </script>
    <style>
        body {
            font-family: Arial, sans-serif;
            max-width: 1200px;
            margin: 0 auto;
            padding: 20px;
        }
        .search-container {
            display: flex;
            gap: 10px;
            margin-bottom: 20px;
        }
        #urlSearch {
            flex: 1;
            padding: 10px;
            font-size: 16px;
        }
        #randomUrl {
            padding: 5px 10px;
            background-color: #1976d2;
            color: white;
            border: none;
            border-radius: 4px;
            cursor: pointer;
        }
        #randomUrl:hover {
            background-color: #1565c0;
        }
        .path-container {
            margin-bottom: 40px;
        }
        .screenshot-container {
            position: relative;
            display: inline-block;
            margin: 10px;
        }
        .bounding-box {
            position: absolute;
            border: 2px solid red;
            pointer-events: none;
        }
        .navigation {
            text-align: center;
            margin-top: 20px;
        }
        .navigation button {
            padding: 10px 20px;
            margin: 0 10px;
            font-size: 16px;
        }
        .path-info {
            margin: 10px 0;
            padding: 5px;
            background-color: #e8e8e8;
            border-radius: 4px;
        }
        .current-url {
            font-weight: bold;
            margin-bottom: 20px;
        }
        .screenshot-container img {
            max-width: 80%;
            height: auto;
            cursor: pointer;
        }
        .modal {
            display: none;
            position: fixed;
            top: 0;
            left: 0;
            width: 100%;
            height: 100%;
            background-color: rgba(0, 0, 0, 0.9);
            z-index: 1000;
            overflow: auto;
        }
        .modal-content {
            position: relative;
            margin: auto;
            padding: 20px;
            text-align: center;
        }
        .modal-image-container {
            position: relative;
            display: inline-block;
            cursor: move;
        }
        .modal img {
            max-width: 90vw;
            max-height: 90vh;
            transform-origin: center;
            user-select: none;
        }
        .zoom-controls {
            position: fixed;
            bottom: 20px;
            left: 50%;
            transform: translateX(-50%);
            background: rgba(255, 255, 255, 0.8);
            padding: 10px;
            border-radius: 5px;
        }
        .zoom-controls button {
            margin: 0 5px;
            padding: 5px 10px;
        }
        .length-selector {
            position: absolute;
            top: 20px;
            right: 20px;
            z-index: 100;
        }
        .length-selector select {
            padding: 5px;
            font-size: 14px;
            border-radius: 4px;
        }
        .step-container {
            margin: 20px 0;
            padding: 15px;
            background-color: #f5f5f5;
            border-radius: 8px;
            border: 1px solid #ddd;
        }
        .step-header {
            margin-bottom: 10px;
            font-weight: bold;
        }
        .step-url {
            color: #0066cc;
            margin-bottom: 10px;
            word-break: break-all;
        }
        .screenshot-path {
            font-family: monospace;
            color: #666;
            margin: 10px 0;
            padding: 5px;
            background-color: #f8f8f8;
            border-radius: 4px;
            word-break: break-all;
            display: flex;
            align-items: center;
            justify-content: space-between;
        }
        .download-btn {
            margin-left: 10px;
            padding: 5px 10px;
            background-color: #1976d2;
            color: white;
            border: none;
            border-radius: 4px;
            cursor: pointer;
            font-size: 14px;
        }
        .download-btn:hover {
            background-color: #1565c0;
        }
        .path-thumbnails {
            display: flex;
            gap: 10px;
            padding: 15px;
            margin-bottom: 20px;
            background-color: #f5f5f5;
            border-radius: 8px;
            overflow-x: auto;
            white-space: nowrap;
        }
        .thumbnail-container {
            position: relative;
            display: inline-block;
            border: 2px solid #ddd;
            border-radius: 4px;
            padding: 2px;
            background: white;
        }
        .thumbnail-image {
            height: 100px;
            width: auto;
            object-fit: contain;
            cursor: pointer;
            transition: transform 0.2s;
        }
        .thumbnail-image:hover {
            transform: scale(1.05);
        }
        .thumbnail-step-label {
            position: absolute;
            top: -10px;
            left: -10px;
            background-color: #1976d2;
            color: white;
            padding: 2px 6px;
            border-radius: 50%;
            font-size: 12px;
            z-index: 1;
        }
        /* 适配横向滚动 */
        .path-thumbnails::-webkit-scrollbar {
            height: 8px;
        }
        .path-thumbnails::-webkit-scrollbar-track {
            background: #f1f1f1;
            border-radius: 4px;
        }
        .path-thumbnails::-webkit-scrollbar-thumb {
            background: #888;
            border-radius: 4px;
        }
        .path-thumbnails::-webkit-scrollbar-thumb:hover {
            background: #555;
        }
        .url-count {
            margin-top: 5px;
            padding: 5px 10px;
            background-color: #f0f0f0;
            border-radius: 4px;
            font-size: 14px;
            color: #333;
            text-align: center;
            border: 1px solid #ddd;
        }
        .request-id-container {
            margin: 10px 0;
            display: flex;
            align-items: center;
            flex-wrap: wrap;
            gap: 10px;
        }
        .view-btn {
            padding: 5px 10px;
            background-color: #4CAF50;
            color: white;
            border: none;
            border-radius: 4px;
            cursor: pointer;
            font-size: 14px;
        }
        .view-btn:hover {
            background-color: #45a049;
        }
        .json-modal-content {
            width: 80%;
            max-width: 1000px;
            max-height: 80vh;
            overflow: auto;
            background-color: white;
            padding: 20px;
            border-radius: 8px;
            position: relative;
        }
        .json-content {
            background-color: #f5f5f5;
            padding: 15px;
            border-radius: 5px;
            overflow: auto;
            max-height: 60vh;
            font-family: monospace;
            white-space: pre-wrap;
            word-break: break-all;
            text-align: left;
        }
        .close-btn {
            position: absolute;
            top: 10px;
            right: 15px;
            font-size: 24px;
            font-weight: bold;
            color: #333;
            cursor: pointer;
        }
        .close-btn:hover {
            color: #000;
        }
        .hljs {
            background: #f5f5f5;
            padding: 15px;
            border-radius: 5px;
        }
        .task-extract-container {
            margin-bottom: 20px;
            padding: 15px;
            background-color: #f5f5f5;
            border-radius: 5px;
            border-left: 4px solid #4CAF50;
        }
        .text-label {
            font-weight: bold;
            margin-bottom: 5px;
            color: #333;
        }
        .task-description, .process-description {
            margin-bottom: 15px;
            padding: 8px;
            background-color: white;
            border-radius: 3px;
            border: 1px solid #ddd;
        }
        .tab-container {
            margin-bottom: 10px;
            border-bottom: 1px solid #ccc;
        }
        .tab-button {
            background-color: #f1f1f1;
            border: none;
            padding: 8px 16px;
            cursor: pointer;
            transition: 0.3s;
        }
        .tab-button:hover {
            background-color: #ddd;
        }
        .tab-button.active {
            background-color: #ccc;
        }
        .axtree-content {
            max-height: 70vh;
            overflow: auto;
            white-space: pre-wrap;
            font-family: monospace;
            background-color: #f5f5f5;
            padding: 10px;
            border: 1px solid #ddd;
        }
    </style>
 </head>
 <body>
    <div class="length-selector">
        <select id="pathLength">
            <option value="1">长度: 1</option>
            <option value="2">长度: 2</option>
            <option value="3">长度: 3</option>
            <option value="4">长度: 4</option>
            <option value="5">长度: 5</option>
            <option value="6">长度: 6</option>
            <option value="7">长度: 7</option>
            <option value="8">长度: 8</option>
            <option value="9">长度: 9</option>
            <option value="10">长度: 10</option>
        </select>
    </div>
    <div class="search-container">
        <input type="text" id="urlSearch" placeholder="搜索URL...">
        <button id="randomUrl">Random URL</button>
    </div>
    <div class="current-url" id="currentUrl"></div>
    <div id="pathsContainer"></div>
    <div class="navigation">
        <button id="prevBtn">上一页</button>
        <button id="nextBtn">下一页</button>
    </div>
    <!-- 添加模态框 -->
    <div id="imageModal" class="modal">
        <div class="modal-content">
            <div class="modal-image-container">
                <img id="modalImage">
                <div id="modalBoundingBox" class="bounding-box"></div>
            </div>
        </div>
        <div class="zoom-controls">
            <button id="zoomIn">放大 (+)</button>
            <button id="zoomReset">重置</button>
            <button id="zoomOut">缩小 (-)</button>
            <button id="closeModal">关闭</button>
        </div>
    </div>
    <script src="show_path.js"></script>
 </body>
 </html>
--- a/misc/axtree_complete.js
+++ b/misc/axtree_complete.js
@ -0,0 +1,207 @@
 import { chromium } from 'playwright';
 import fs from 'fs';
 import path from 'path';
 import { fileURLToPath } from 'url';
 // 获取当前文件的目录路径（ES模块中没有 __dirname）
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
 async function getAXTreeForUrl(url) {
  // 启动浏览器并打开页面
  const browser = await chromium.launch({ headless: true });
  const page = await browser.newPage();
  await page.goto(url, { waitUntil: 'networkidle' });
  // 等待页面完全加载，避免 "Loading..." 状态
  await page.waitForTimeout(10000);
  console.log('页面已加载: ' + url);
  // 提取完整的 AXTree（设置 interestingOnly: false 可获取全部节点）
  const axTree = await page.accessibility.snapshot({ interestingOnly: false });
  // 全局计数器和编号到"伪选择器"映射的对象
  let idCounter = 1;
  const idToSelector = {};
  // 在文件开头添加一个全局变量来存储父子关系
  const nodeParents = new Map();
  let axTreeOutput = '';
  function traverse(node, depth = 0, parent = null) {
    // 记录父节点关系
    nodeParents.set(node, parent);
    // 增加 InlineTextBox 到过滤条件中
    if (((node.role === 'none' || node.role === 'generic' || node.role === 'InlineTextBox') && 
        !node.name && 
        !node.focusable && 
        !node.focused && 
        node.expanded === undefined) ||
        node.role === 'InlineTextBox'  // 无论如何都跳过 InlineTextBox
    ) {
      // 直接处理子节点
      if (node.children && node.children.length > 0) {
        for (const child of node.children) {
          traverse(child, depth, node);
        }
      }
      return;
    }
    const currentId = idCounter++;
    // 构建更详细的 selector
    let selectorParts = [`role=${node.role}`];
    if (node.name) {
      selectorParts.push(`[name="${node.name}"]`);
    }
    // 添加其他可能的属性
    if (node.selected) selectorParts.push('[selected=true]');
    if (node.checked !== undefined) selectorParts.push(`[checked=${node.checked}]`);
    if (node.pressed !== undefined) selectorParts.push(`[pressed=${node.pressed}]`);
    // 如果有父节点，添加父节点信息
    if (parent && parent.role !== 'WebArea') {
      let parentSelector = `role=${parent.role}`;
      if (parent.name) {
        parentSelector += `[name="${parent.name}"]`;
      }
      selectorParts.unshift(`${parentSelector} >>`);
    }
    // 如果是列表项，添加位置信息
    if (parent && parent.children) {
      const siblingIndex = parent.children.findIndex(child => child === node);
      if (siblingIndex !== -1) {
        selectorParts.push(`:nth-match(${siblingIndex + 1})`);
      }
    }
    idToSelector[currentId] = selectorParts.join(' ');
    // 收集所有可能的属性
    let props = [];
    if (node.focusable) props.push('focusable');
    if (node.focused) props.push('focused');
    if (node.expanded !== undefined) props.push(`expanded=${node.expanded}`);
    if (node.selected) props.push('selected');
    if (node.checked !== undefined) props.push(`checked=${node.checked}`);
    if (node.disabled) props.push('disabled');
    if (node.required) props.push('required');
    if (node.pressed !== undefined) props.push(`pressed=${node.pressed}`);
    // 判断元素是否可点击
    const clickableRoles = ['button', 'link', 'menuitem', 'tab', 'checkbox', 'radio', 'switch', 'option'];
    const isClickable = clickableRoles.includes(node.role) || 
                       node.focusable || 
                       node.role === 'generic' && node.name && node.focusable;
    if (isClickable) props.push('clickable');
    const indent = ' '.repeat(depth * 4);
    const line = `${indent}[${currentId}] ${node.role} '${node.name || ''}'${props.length > 0 ? ' (' + props.join(', ') + ')' : ''}`;
    axTreeOutput += line + '\n';
    console.log(line);
    if (node.children && node.children.length > 0) {
      for (const child of node.children) {
        traverse(child, depth + 1, node);
      }
    }
  }
  // 输出 AXTree 的整体结构
  axTreeOutput += '## AXTree:\n';
  console.log('## AXTree:');
  // 打印根节点信息（这里用 Root+role 来模拟输出）
  let rootProps = [];
  if (axTree.focusable) rootProps.push('focusable=True');
  if (axTree.focused) rootProps.push('focused');
  const rootLine = `Root${axTree.role ? axTree.role : 'WebArea'} '${axTree.name || ''}'${rootProps.length > 0 ? ', ' + rootProps.join(', ') : ''}`;
  axTreeOutput += rootLine + '\n';
  console.log(rootLine);
  if (axTree.children && axTree.children.length > 0) {
    for (const child of axTree.children) {
      traverse(child, 1, axTree);
    }
  }
  await browser.close();
  return { axTreeOutput, idToSelector };
 }
 async function processAXTreeFiles() {
  const axtreesDir = path.join(__dirname, 'axtrees');
  // 确保目录存在
  if (!fs.existsSync(axtreesDir)) {
    fs.mkdirSync(axtreesDir, { recursive: true });
  }
  // 读取所有 txt 文件
  const files = fs.readdirSync(axtreesDir).filter(file => file.endsWith('.txt'));
  for (const file of files) {
    const filePath = path.join(axtreesDir, file);
    const content = fs.readFileSync(filePath, 'utf8');
    // 检查是否包含 "Loading ..."
    if (content.includes('Loading ...')) {
      console.log(`文件 ${file} 包含 "Loading ..."，需要重新获取`);
      // 从文件名中提取 ID
      const id = path.basename(file, '.txt');
      // 查找对应的 JSON 文件
      const jsonPath = path.join(__dirname, 'storage', 'request_queues', 'default', `${id}.json`);
      if (fs.existsSync(jsonPath)) {
        try {
          const jsonContent = fs.readFileSync(jsonPath, 'utf8');
          const jsonData = JSON.parse(jsonContent);
          const jsonObj = JSON.parse(jsonData.json);
          const url = jsonObj.url;
          console.log(`为 ID ${id} 找到 URL: ${url}`);
          // 获取新的 AXTree
          const { axTreeOutput, idToSelector } = await getAXTreeForUrl(url);
          // 检查新获取的 AXTree 是否包含 "Loading ..."
          if (!axTreeOutput.includes('Loading ...')) {
            // 保存 AXTree 到原文件
            fs.writeFileSync(filePath, axTreeOutput);
            // 保存 idToSelector 到新文件
            const selectorFilePath = path.join(axtreesDir, `${id}_idToSelector.json`);
            fs.writeFileSync(selectorFilePath, JSON.stringify(idToSelector, null, 2));
            console.log(`已更新 ${file} 和创建 ${id}_idToSelector.json`);
          } else {
            console.log(`警告: 新获取的 AXTree 仍然包含 "Loading ..."`);
          }
        } catch (error) {
          console.error(`处理 ${file} 时出错:`, error);
        }
      } else {
        console.log(`找不到对应的 JSON 文件: ${jsonPath}`);
      }
    }
  }
 }
 // 主函数
 (async () => {
  try {
    await processAXTreeFiles();
    console.log('所有 AXTree 文件处理完成');
  } catch (error) {
    console.error('处理过程中出错:', error);
  }
 })();
--- a/misc/crawl_grafana_v18.log
+++ b/misc/crawl_grafana_v18.log
--- a/misc/get_axtree_v18.js
+++ b/misc/get_axtree_v18.js
@ -0,0 +1,254 @@
 const { chromium } = require('playwright');
 const fs = require('fs').promises;
 const path = require('path');
 async function getAXTreeForUrl(url, browser) {
  // 创建新页面
  const page = await browser.newPage();
  const result = {
    axtree: "",
    idToSelector: {},
    error: null
  };
  try {
    // 访问页面并等待加载
    await page.goto(url, { 
      waitUntil: 'domcontentloaded',
      timeout: 30000 
    });
    await page.waitForTimeout(5000);
    const clickedButtons = new Set();
    const expandButtons = async () => {
      console.log('开始寻找可展开按钮...');
      const buttons = await page.$$('button[aria-label*="Expand"], button[aria-label*="Open"], button[aria-expanded="false"]');
      console.log(`找到 ${buttons.length} 个折叠按钮`);
      let newButtonsFound = false;
      for (const button of buttons) {
          try {
              // 使用更可靠的滚动方法
              await page.evaluate(element => {
                  // 使用JavaScript的scrollIntoView，更直接且兼容性更好
                  element.scrollIntoView({behavior: 'smooth', block: 'center'});
              }, button);
              const ariaLabel = await button.getAttribute('aria-label');
              if (!clickedButtons.has(ariaLabel)) {
                  console.log(`点击新按钮: ${ariaLabel}`);
                  await button.click();
                  clickedButtons.add(ariaLabel);
                  newButtonsFound = true;
                  await page.waitForTimeout(200);
              }
          } catch (e) {
              console.log(`点击失败: ${e.message}`);
          }
      }
      return newButtonsFound;
    };
    let iteration = 1;
    while (true) {
        console.log(`\n第 ${iteration} 次查找...`);
        const foundNewButtons = await expandButtons();
        if (!foundNewButtons) {
            console.log('没有发现新的可展开按钮，结束查找');
            break;
        }
        console.log(`已点击按钮数量: ${clickedButtons.size}`);
        await page.waitForTimeout(500);
        iteration++;
    }
    // await page.waitForTimeout(5000);
    // 获取 AXTree
    const axTree = await page.accessibility.snapshot({ interestingOnly: false });
    // 用于存储 id 到 selector 的映射
    let idCounter = 1;
    const idToSelector = {};
    const nodeParents = new Map();
    // 用于构建文本形式的 AXTree
    let axtreeText = [];
    function traverse(node, depth = 0, parent = null) {
      nodeParents.set(node, parent);
      if (((node.role === 'none' || node.role === 'generic' || node.role === 'InlineTextBox') && 
          !node.name && 
          !node.focusable && 
          !node.focused && 
          node.expanded === undefined) ||
          node.role === 'InlineTextBox'
      ) {
        if (node.children?.length > 0) {
          for (const child of node.children) {
            traverse(child, depth, node);
          }
        }
        return;
      }
      const currentId = idCounter++;
      let selectorParts = [`role=${node.role}`];
      if (node.name) {
        selectorParts.push(`[name="${node.name}"]`);
      }
      if (node.selected) selectorParts.push('[selected=true]');
      if (node.checked !== undefined) selectorParts.push(`[checked=${node.checked}]`);
      if (node.pressed !== undefined) selectorParts.push(`[pressed=${node.pressed}]`);
      if (parent && parent.role !== 'WebArea') {
        let parentSelector = `role=${parent.role}`;
        if (parent.name) {
          parentSelector += `[name="${parent.name}"]`;
        }
        selectorParts.unshift(`${parentSelector} >>`);
      }
      if (parent?.children) {
        const siblingIndex = parent.children.findIndex(child => child === node);
        if (siblingIndex !== -1) {
          selectorParts.push(`:nth-match(${siblingIndex + 1})`);
        }
      }
      idToSelector[currentId] = selectorParts.join(' ');
      // 收集所有可能的属性
      let props = [];
      if (node.focusable) props.push('focusable');
      if (node.focused) props.push('focused');
      if (node.expanded !== undefined) props.push(`expanded=${node.expanded}`);
      if (node.selected) props.push('selected');
      if (node.checked !== undefined) props.push(`checked=${node.checked}`);
      if (node.disabled) props.push('disabled');
      if (node.required) props.push('required');
      if (node.pressed !== undefined) props.push(`pressed=${node.pressed}`);
      // 判断元素是否可点击
      const clickableRoles = ['button', 'link', 'menuitem', 'tab', 'checkbox', 'radio', 'switch', 'option'];
      const isClickable = clickableRoles.includes(node.role) || 
                         node.focusable || 
                         node.role === 'generic' && node.name && node.focusable;
      if (isClickable) props.push('clickable');
      const indent = ' '.repeat(depth * 4);
      const nodeLine = `${indent}[${currentId}] ${node.role} '${node.name || ''}'${props.length > 0 ? ' (' + props.join(', ') + ')' : ''}`;
      axtreeText.push(nodeLine);
      if (node.children?.length > 0) {
        for (const child of node.children) {
          traverse(child, depth + 1, node);
        }
      }
    }
    // 添加根节点信息
    let rootProps = [];
    if (axTree.focusable) rootProps.push('focusable=True');
    if (axTree.focused) rootProps.push('focused');
    axtreeText.push(`Root${axTree.role ? axTree.role : 'WebArea'} '${axTree.name || ''}'${rootProps.length > 0 ? ', ' + rootProps.join(', ') : ''}`);
    // 遍历 AXTree
    if (axTree.children?.length > 0) {
      for (const child of axTree.children) {
        traverse(child, 1, axTree);
      }
    }
    result.axtree = axtreeText.join('\n');
    result.idToSelector = idToSelector;
  } catch (error) {
    result.error = error.message;
    console.error(`Error processing URL ${url}:`, error);
  } finally {
    await page.close();
  }
  return result;
 }
 async function processUrls() {
  try {
    // 读取输入文件
    const inputData = JSON.parse(
      await fs.readFile('path/processed_3.json', 'utf8')
    );
    // 启动浏览器
    const browser = await chromium.launch({ headless: true });
    // 收集所有唯一的 URL
    const uniqueUrls = new Set();
    for (const item of Object.values(inputData)) {
      if (item.shortestPathsMeta) {
        for (const meta of item.shortestPathsMeta) {
          if (meta.chainUrls) {
            meta.chainUrls.forEach(url => uniqueUrls.add(url));
          }
        }
      }
    }
    // 存储结果的对象
    const results = {};
    // 处理每个 URL
    let processed = 0;
    const total = uniqueUrls.size;
    for (const url of uniqueUrls) {
      processed++;
      console.log(`\n========== 处理 URL ${processed}/${total} ==========`);
      console.log(`URL: ${url}`);
      const result = await getAXTreeForUrl(url, browser);
      results[url] = result;
      // 打印 AXTree 信息
      if (result.error) {
        console.log(`获取 AXTree 失败: ${result.error}`);
      } else {
        console.log(`AXTree 获取成功，包含 ${Object.keys(result.idToSelector).length} 个节点`);
        console.log(`AXTree 预览 (前5行):`);
        const previewLines = result.axtree.split('\n').slice(0, 5);
        console.log(previewLines.join('\n'));
        if (result.axtree.split('\n').length > 5) {
          console.log('... (更多内容已省略)');
        }
      }
      console.log(`========== URL ${processed}/${total} 处理完成 ==========\n`);
    }
    // 关闭浏览器
    await browser.close();
    // 写入结果文件
    await fs.writeFile(
      'path/processed_3_axtree.json',
      JSON.stringify(results, null, 2)
    );
    console.log('Processing complete. Results saved to path/processed_3_axtree.json');
  } catch (error) {
    console.error('Error in main process:', error);
  }
 }
 // 运行主程序
 processUrls();
--- a/misc/show_path_0324_4omini.js
+++ b/misc/show_path_0324_4omini.js
@ -0,0 +1,528 @@
 class PathVisualizer {
    constructor() {
        this.currentData = null;
        this.currentIndex = 0;
        this.allData = {};
        this.urls = {};  // 改为对象，每个长度存储对应的URLs
        this.currentLength = 1;  // 当前选择的路径长度
        this.currentScale = 1;
        this.translateX = 0;
        this.translateY = 0;
        this.isDragging = false;
        this.currentModalBox = null;
        this.analysisData = {}; // 存储分析数据
        this.initializeEventListeners();
        this.initializeModal();
        this.loadData();
    }
    async loadData() {
        try {
            // 加载当前长度的数据
            const response = await fetch(`path/processed_${this.currentLength}.json`);
            const data = await response.json();
            this.allData = data;
            this.urls = Object.keys(this.allData);
            // 加载分析数据
            try {
                const analysisResponse = await fetch(`path/processed_${this.currentLength}_with_analysis.json`);
                this.analysisData = await analysisResponse.json();
            } catch (error) {
                console.error('加载分析数据失败:', error);
                this.analysisData = {};
            }
            // 更新URL总数显示
            this.updateUrlCount();
            this.showPath(0);
        } catch (error) {
            console.error('加载数据失败:', error);
        }
    }
    updateUrlCount() {
        // 创建或更新URL计数显示
        let urlCountElement = document.getElementById('urlCount');
        if (!urlCountElement) {
            urlCountElement = document.createElement('div');
            urlCountElement.id = 'urlCount';
            urlCountElement.className = 'url-count';
            // 获取长度选择器的父元素，并在其后插入计数元素
            const lengthSelector = document.getElementById('pathLength');
            lengthSelector.parentNode.appendChild(urlCountElement);
        }
        // 更新计数显示
        urlCountElement.textContent = `页面数: ${this.urls.length}`;
    }
    initializeEventListeners() {
        document.getElementById('prevBtn').addEventListener('click', () => this.showPrevious());
        document.getElementById('nextBtn').addEventListener('click', () => this.showNext());
        document.getElementById('urlSearch').addEventListener('input', (e) => this.handleSearch(e));
        // 添加随机URL按钮的事件监听
        document.getElementById('randomUrl').addEventListener('click', () => this.showRandomUrl());
        // 添加路径长度选择事件
        document.getElementById('pathLength').addEventListener('change', (e) => {
            this.currentLength = parseInt(e.target.value);
            this.currentIndex = 0;  // 重置索引
            this.loadData();  // 重新加载数据
        });
    }
    initializeModal() {
        const modal = document.getElementById('imageModal');
        const modalImg = document.getElementById('modalImage');
        const zoomIn = document.getElementById('zoomIn');
        const zoomOut = document.getElementById('zoomOut');
        const zoomReset = document.getElementById('zoomReset');
        const closeModalBtn = document.getElementById('closeModal');
        // 关闭模态框
        const closeModal = () => {
            modal.style.display = 'none';
            this.currentScale = 1;
            this.translateX = 0;
            this.translateY = 0;
            this.updateModalImageScale();
        };
        closeModalBtn.onclick = closeModal;
        // 点击空白处关闭
        modal.onclick = (event) => {
            if (event.target === modal) {
                closeModal();
            }
        };
        // 鼠标拖拽
        modalImg.onmousedown = (e) => {
            this.isDragging = true;
            this.startX = e.clientX - this.translateX;
            this.startY = e.clientY - this.translateY;
            e.preventDefault();
        };
        document.onmousemove = (e) => {
            if (!this.isDragging) return;
            this.translateX = e.clientX - this.startX;
            this.translateY = e.clientY - this.startY;
            this.updateModalImageScale();
        };
        document.onmouseup = () => {
            this.isDragging = false;
        };
        // 优化滚轮缩放，以鼠标位置为中心，降低灵敏度
        modal.addEventListener('wheel', (e) => {
            e.preventDefault();
            // 获取鼠标相对于图片的位置
            const rect = modalImg.getBoundingClientRect();
            const mouseX = e.clientX - rect.left;
            const mouseY = e.clientY - rect.top;
            // 计算鼠标在图片上的相对位置（考虑当前的变换）
            const x = (mouseX - this.translateX) / this.currentScale;
            const y = (mouseY - this.translateY) / this.currentScale;
            // 计算缩放比例，降低灵敏度
            const delta = e.deltaY > 0 ? 0.95 : 1.05;  // 从0.9/1.1改为0.95/1.05
            const newScale = this.currentScale * delta;
            // 限制最大和最小缩放比例
            const limitedScale = Math.min(Math.max(newScale, 0.1), 10);  // 限制在0.1到10倍之间
            // 只有当缩放比例在限制范围内才应用变换
            if (limitedScale !== this.currentScale) {
                // 计算新的平移值，保持鼠标位置不变
                this.translateX = mouseX - x * limitedScale;
                this.translateY = mouseY - y * limitedScale;
                this.currentScale = limitedScale;
                this.updateModalImageScale();
            }
        }, { passive: false });
        // 按钮缩放
        zoomIn.onclick = () => {
            this.currentScale *= 1.2;
            this.updateModalImageScale();
        };
        zoomOut.onclick = () => {
            this.currentScale /= 1.2;
            this.updateModalImageScale();
        };
        zoomReset.onclick = () => {
            this.currentScale = 1;
            this.translateX = 0;
            this.translateY = 0;
            this.updateModalImageScale();
        };
    }
    handleSearch(event) {
        const searchTerm = event.target.value.toLowerCase();
        const foundIndex = this.urls.findIndex(url => url.toLowerCase().includes(searchTerm));
        if (foundIndex !== -1) {
            this.showPath(foundIndex);
        }
    }
    showRandomUrl() {
        if (this.urls && this.urls.length > 0) {
            // 生成随机索引
            const randomIndex = Math.floor(Math.random() * this.urls.length);
            // 显示随机选中的路径
            this.showPath(randomIndex);
        }
    }
    showPath(index) {
        if (index < 0 || index >= this.urls.length) return;
        this.currentIndex = index;
        const url = this.urls[index];
        this.currentData = this.allData[url];
        document.getElementById('currentUrl').textContent = `[长度: ${this.currentLength}] ${url}`;
        this.renderPaths();
    }
    showPrevious() {
        this.showPath(this.currentIndex - 1);
    }
    showNext() {
        this.showPath(this.currentIndex + 1);
    }
    openModal(img) {
        const modal = document.getElementById('imageModal');
        const modalImg = document.getElementById('modalImage');
        modal.style.display = 'block';
        modalImg.src = img.src;
        // 重置缩放和位置
        this.currentScale = 1;
        this.translateX = 0;
        this.translateY = 0;
        this.updateModalImageScale();
    }
    updateModalImageScale() {
        const modalImg = document.getElementById('modalImage');
        const modalBox = document.getElementById('modalBoundingBox');
        // 应用缩放和平移
        modalImg.style.transform = `translate(${this.translateX}px, ${this.translateY}px) scale(${this.currentScale})`;
        modalImg.style.transformOrigin = '0 0';  // 设置变换原点为左上角
        if (this.currentModalBox) {
            modalBox.style.transform = `translate(${this.translateX}px, ${this.translateY}px) scale(${this.currentScale})`;
            modalBox.style.transformOrigin = '0 0';
        }
    }
    async renderPaths() {
        const container = document.getElementById('pathsContainer');
        container.innerHTML = '';
        // 获取当前URL的分析数据
        const currentUrl = this.urls[this.currentIndex];
        const analysisForUrl = this.analysisData[currentUrl] || {};
        // 添加任务描述区域 - 即使没有数据也显示空白框架
        const taskContainer = document.createElement('div');
        taskContainer.className = 'task-extract-container';
        // 添加任务标题
        const titleLabel = document.createElement('div');
        titleLabel.className = 'text-label';
        titleLabel.textContent = '任务描述:';
        taskContainer.appendChild(titleLabel);
        const titleText = document.createElement('div');
        titleText.className = 'task-description';
        // 如果有数据则显示，否则留空
        if (analysisForUrl.shortestPathsMeta && analysisForUrl.shortestPathsMeta.length > 0) {
            titleText.textContent = analysisForUrl.shortestPathsMeta[0].title || '';
        }
        taskContainer.appendChild(titleText);
        // 添加过程描述
        const descLabel = document.createElement('div');
        descLabel.className = 'text-label';
        descLabel.textContent = '过程描述:';
        taskContainer.appendChild(descLabel);
        const descText = document.createElement('div');
        descText.className = 'process-description';
        // 如果有数据则显示，否则留空
        if (analysisForUrl.shortestPathsMeta && analysisForUrl.shortestPathsMeta.length > 0) {
            descText.textContent = analysisForUrl.shortestPathsMeta[0].description || '';
        }
        taskContainer.appendChild(descText);
        container.appendChild(taskContainer);
        for (const path of this.currentData.shortestPaths) {
            const pathDiv = document.createElement('div');
            pathDiv.className = 'path-container';
            const meta = this.currentData.shortestPathsMeta.find(m => 
                JSON.stringify(m.chainIDs) === JSON.stringify(path));
            if (!meta) continue;
            // 添加缩略图预览区域
            const thumbnailsDiv = document.createElement('div');
            thumbnailsDiv.className = 'path-thumbnails';
            // 创建所有步骤的缩略图
            for (let i = 0; i < path.length; i++) {
                const thumbDiv = document.createElement('div');
                thumbDiv.className = 'thumbnail-container';
                // 添加步骤编号
                const stepLabel = document.createElement('div');
                stepLabel.className = 'thumbnail-step-label';
                stepLabel.textContent = `${i}`;
                thumbDiv.appendChild(stepLabel);
                // 添加缩略图，使用 chainChildNum 而不是滚动位置
                const imgPath = i === path.length - 1 
                    ? `screenshots/${path[i]}_full.png`
                    : `screenshots/${path[i]}_${meta.chainChildNum[i]}.png`;
                const thumbImg = document.createElement('img');
                thumbImg.src = imgPath;
                thumbImg.className = 'thumbnail-image';
                thumbImg.onclick = () => this.openModal(thumbImg);
                thumbDiv.appendChild(thumbImg);
                thumbnailsDiv.appendChild(thumbDiv);
            }
            pathDiv.appendChild(thumbnailsDiv);
            for (let i = 0; i < path.length; i++) {
                const stepDiv = document.createElement('div');
                stepDiv.className = 'step-container';
                const stepHeader = document.createElement('div');
                stepHeader.className = 'step-header';
                stepHeader.textContent = `步骤 ${i}`;
                stepDiv.appendChild(stepHeader);
                const urlDiv = document.createElement('div');
                urlDiv.className = 'step-url';
                urlDiv.textContent = `URL: ${meta.chainUrls[i]}`;
                stepDiv.appendChild(urlDiv);
                // 添加request ID显示和按钮区域
                const requestId = path[i];
                const requestIdDiv = document.createElement('div');
                requestIdDiv.className = 'request-id-container';
                // 显示request ID
                const requestIdLabel = document.createElement('span');
                requestIdLabel.textContent = `request id: ${requestId}`;
                requestIdDiv.appendChild(requestIdLabel);
                // 添加查看child按钮
                const viewChildBtn = document.createElement('button');
                viewChildBtn.className = 'view-btn';
                viewChildBtn.textContent = '查看child';
                viewChildBtn.onclick = () => this.viewChildJson(requestId);
                requestIdDiv.appendChild(viewChildBtn);
                // 添加查看request queue按钮
                const viewQueueBtn = document.createElement('button');
                viewQueueBtn.className = 'view-btn';
                viewQueueBtn.textContent = '查看request queue';
                viewQueueBtn.onclick = () => this.viewRequestQueueJson(requestId);
                requestIdDiv.appendChild(viewQueueBtn);
                stepDiv.appendChild(requestIdDiv);
                if (i < meta.chainTexts.length) {
                    const textDiv = document.createElement('div');
                    textDiv.className = 'path-info';
                    textDiv.textContent = `点击文本: ${meta.chainTexts[i]}`;
                    stepDiv.appendChild(textDiv);
                }
                // 构建并显示截图路径，使用 chainChildNum 而不是滚动位置
                const imgPath = i === path.length - 1 
                    ? `screenshots/${path[i]}_full.png`
                    : `screenshots/${path[i]}_${meta.chainChildNum[i]}.png`;
                const screenshotPathDiv = document.createElement('div');
                screenshotPathDiv.className = 'screenshot-path';
                screenshotPathDiv.textContent = `截图路径: ${imgPath}`;
                // 添加下载按钮
                const downloadBtn = document.createElement('button');
                downloadBtn.className = 'download-btn';
                downloadBtn.textContent = '下载截图';
                downloadBtn.onclick = () => this.downloadImage(imgPath);
                screenshotPathDiv.appendChild(downloadBtn);
                stepDiv.appendChild(screenshotPathDiv);
                const screenshotDiv = document.createElement('div');
                screenshotDiv.className = 'screenshot-container';
                const img = document.createElement('img');
                img.src = imgPath;
                img.onclick = () => this.openModal(img);
                img.onload = () => {
                    if (i < meta.chainViewportBoundingBoxes.length && i !== path.length - 1) {
                        const box = meta.chainViewportBoundingBoxes[i];
                        const boundingBox = document.createElement('div');
                        boundingBox.className = 'bounding-box';
                        const scale = img.width / img.naturalWidth;
                        boundingBox.style.left = `${box.x * scale}px`;
                        boundingBox.style.top = `${box.y * scale}px`;
                        boundingBox.style.width = `${box.width * scale}px`;
                        boundingBox.style.height = `${box.height * scale}px`;
                        screenshotDiv.appendChild(boundingBox);
                    }
                };
                screenshotDiv.appendChild(img);
                stepDiv.appendChild(screenshotDiv);
                pathDiv.appendChild(stepDiv);
            }
            container.appendChild(pathDiv);
        }
    }
    // 添加查看child JSON的方法
    async viewChildJson(requestId) {
        try {
            const response = await fetch(`path/childs/${requestId}.json`);
            if (!response.ok) {
                throw new Error(`HTTP error! status: ${response.status}`);
            }
            const data = await response.json();
            this.showJsonModal('Child JSON', data);
        } catch (error) {
            console.error('加载child JSON失败:', error);
            alert(`加载child JSON失败: ${error.message}`);
        }
    }
    // 添加查看request queue JSON的方法
    async viewRequestQueueJson(requestId) {
        try {
            const response = await fetch(`storage/request_queues/default/${requestId}.json`);
            if (!response.ok) {
                throw new Error(`HTTP error! status: ${response.status}`);
            }
            const data = await response.json();
            this.showJsonModal('Request Queue JSON', data);
        } catch (error) {
            console.error('加载request queue JSON失败:', error);
            alert(`加载request queue JSON失败: ${error.message}`);
        }
    }
    // 显示JSON模态框
    showJsonModal(title, jsonData) {
        // 检查是否已存在JSON模态框，如果不存在则创建
        let jsonModal = document.getElementById('jsonModal');
        if (!jsonModal) {
            jsonModal = document.createElement('div');
            jsonModal.id = 'jsonModal';
            jsonModal.className = 'modal';
            const modalContent = document.createElement('div');
            modalContent.className = 'modal-content json-modal-content';
            const closeBtn = document.createElement('span');
            closeBtn.className = 'close-btn';
            closeBtn.innerHTML = '&times;';
            closeBtn.onclick = () => { jsonModal.style.display = 'none'; };
            const modalTitle = document.createElement('h3');
            modalTitle.id = 'jsonModalTitle';
            const preElement = document.createElement('pre');
            preElement.id = 'jsonContent';
            preElement.className = 'json-content';
            modalContent.appendChild(closeBtn);
            modalContent.appendChild(modalTitle);
            modalContent.appendChild(preElement);
            jsonModal.appendChild(modalContent);
            document.body.appendChild(jsonModal);
            // 点击模态框外部关闭
            jsonModal.onclick = (event) => {
                if (event.target === jsonModal) {
                    jsonModal.style.display = 'none';
                }
            };
        }
        // 更新模态框内容
        document.getElementById('jsonModalTitle').textContent = title;
        // 格式化并高亮JSON
        const formattedJson = JSON.stringify(jsonData, null, 2);
        const preElement = document.getElementById('jsonContent');
        preElement.textContent = formattedJson;
        // 如果有语法高亮库（如highlight.js），可以在这里应用
        if (window.hljs) {
            preElement.innerHTML = window.hljs.highlight('json', formattedJson).value;
        }
        // 显示模态框
        jsonModal.style.display = 'block';
    }
    // 添加下载图片的方法
    async downloadImage(imgPath) {
        try {
            const response = await fetch(imgPath);
            const blob = await response.blob();
            const url = window.URL.createObjectURL(blob);
            const a = document.createElement('a');
            a.href = url;
            a.download = imgPath.split('/').pop(); // 使用原始文件名
            document.body.appendChild(a);
            a.click();
            window.URL.revokeObjectURL(url);
            document.body.removeChild(a);
        } catch (error) {
            console.error('下载图片失败:', error);
            alert('下载图片失败，请重试');
        }
    }
 }
 // 初始化可视化器
 document.addEventListener('DOMContentLoaded', () => {
    new PathVisualizer();
 });
--- a/misc/show_path_qa.js
+++ b/misc/show_path_qa.js
@ -0,0 +1,527 @@
 class PathVisualizer {
    constructor() {
        this.currentData = null;
        this.currentIndex = 0;
        this.allData = {};
        this.urls = {};  // 改为对象，每个长度存储对应的URLs
        this.currentLength = 1;  // 当前选择的路径长度
        this.currentScale = 1;
        this.translateX = 0;
        this.translateY = 0;
        this.isDragging = false;
        this.currentModalBox = null;
        this.analysisData = {}; // 存储分析数据
        this.initializeEventListeners();
        this.initializeModal();
        this.loadData();
    }
    async loadData() {
        try {
            // 加载当前长度的数据
            const response = await fetch(`path/processed_${this.currentLength}.json`);
            const data = await response.json();
            this.allData = data;
            this.urls = Object.keys(this.allData);
            // 加载分析数据
            try {
                const analysisResponse = await fetch(`path/processed_${this.currentLength}_with_analysis.json`);
                this.analysisData = await analysisResponse.json();
            } catch (error) {
                console.error('加载分析数据失败:', error);
                this.analysisData = {};
            }
            // 更新URL总数显示
            this.updateUrlCount();
            this.showPath(0);
        } catch (error) {
            console.error('加载数据失败:', error);
        }
    }
    updateUrlCount() {
        // 创建或更新URL计数显示
        let urlCountElement = document.getElementById('urlCount');
        if (!urlCountElement) {
            urlCountElement = document.createElement('div');
            urlCountElement.id = 'urlCount';
            urlCountElement.className = 'url-count';
            // 获取长度选择器的父元素，并在其后插入计数元素
            const lengthSelector = document.getElementById('pathLength');
            lengthSelector.parentNode.appendChild(urlCountElement);
        }
        // 更新计数显示
        urlCountElement.textContent = `页面数: ${this.urls.length}`;
    }
    initializeEventListeners() {
        document.getElementById('prevBtn').addEventListener('click', () => this.showPrevious());
        document.getElementById('nextBtn').addEventListener('click', () => this.showNext());
        document.getElementById('urlSearch').addEventListener('input', (e) => this.handleSearch(e));
        // 添加随机URL按钮的事件监听
        document.getElementById('randomUrl').addEventListener('click', () => this.showRandomUrl());
        // 添加路径长度选择事件
        document.getElementById('pathLength').addEventListener('change', (e) => {
            this.currentLength = parseInt(e.target.value);
            this.currentIndex = 0;  // 重置索引
            this.loadData();  // 重新加载数据
        });
    }
    initializeModal() {
        const modal = document.getElementById('imageModal');
        const modalImg = document.getElementById('modalImage');
        const zoomIn = document.getElementById('zoomIn');
        const zoomOut = document.getElementById('zoomOut');
        const zoomReset = document.getElementById('zoomReset');
        const closeModalBtn = document.getElementById('closeModal');
        // 关闭模态框
        const closeModal = () => {
            modal.style.display = 'none';
            this.currentScale = 1;
            this.translateX = 0;
            this.translateY = 0;
            this.updateModalImageScale();
        };
        closeModalBtn.onclick = closeModal;
        // 点击空白处关闭
        modal.onclick = (event) => {
            if (event.target === modal) {
                closeModal();
            }
        };
        // 鼠标拖拽
        modalImg.onmousedown = (e) => {
            this.isDragging = true;
            this.startX = e.clientX - this.translateX;
            this.startY = e.clientY - this.translateY;
            e.preventDefault();
        };
        document.onmousemove = (e) => {
            if (!this.isDragging) return;
            this.translateX = e.clientX - this.startX;
            this.translateY = e.clientY - this.startY;
            this.updateModalImageScale();
        };
        document.onmouseup = () => {
            this.isDragging = false;
        };
        // 优化滚轮缩放，以鼠标位置为中心，降低灵敏度
        modal.addEventListener('wheel', (e) => {
            e.preventDefault();
            // 获取鼠标相对于图片的位置
            const rect = modalImg.getBoundingClientRect();
            const mouseX = e.clientX - rect.left;
            const mouseY = e.clientY - rect.top;
            // 计算鼠标在图片上的相对位置（考虑当前的变换）
            const x = (mouseX - this.translateX) / this.currentScale;
            const y = (mouseY - this.translateY) / this.currentScale;
            // 计算缩放比例，降低灵敏度
            const delta = e.deltaY > 0 ? 0.95 : 1.05;  // 从0.9/1.1改为0.95/1.05
            const newScale = this.currentScale * delta;
            // 限制最大和最小缩放比例
            const limitedScale = Math.min(Math.max(newScale, 0.1), 10);  // 限制在0.1到10倍之间
            // 只有当缩放比例在限制范围内才应用变换
            if (limitedScale !== this.currentScale) {
                // 计算新的平移值，保持鼠标位置不变
                this.translateX = mouseX - x * limitedScale;
                this.translateY = mouseY - y * limitedScale;
                this.currentScale = limitedScale;
                this.updateModalImageScale();
            }
        }, { passive: false });
        // 按钮缩放
        zoomIn.onclick = () => {
            this.currentScale *= 1.2;
            this.updateModalImageScale();
        };
        zoomOut.onclick = () => {
            this.currentScale /= 1.2;
            this.updateModalImageScale();
        };
        zoomReset.onclick = () => {
            this.currentScale = 1;
            this.translateX = 0;
            this.translateY = 0;
            this.updateModalImageScale();
        };
    }
    handleSearch(event) {
        const searchTerm = event.target.value.toLowerCase();
        const foundIndex = this.urls.findIndex(url => url.toLowerCase().includes(searchTerm));
        if (foundIndex !== -1) {
            this.showPath(foundIndex);
        }
    }
    showRandomUrl() {
        if (this.urls && this.urls.length > 0) {
            // 生成随机索引
            const randomIndex = Math.floor(Math.random() * this.urls.length);
            // 显示随机选中的路径
            this.showPath(randomIndex);
        }
    }
    showPath(index) {
        if (index < 0 || index >= this.urls.length) return;
        this.currentIndex = index;
        const url = this.urls[index];
        this.currentData = this.allData[url];
        document.getElementById('currentUrl').textContent = `[长度: ${this.currentLength}] ${url}`;
        this.renderPaths();
    }
    showPrevious() {
        this.showPath(this.currentIndex - 1);
    }
    showNext() {
        this.showPath(this.currentIndex + 1);
    }
    openModal(img) {
        const modal = document.getElementById('imageModal');
        const modalImg = document.getElementById('modalImage');
        modal.style.display = 'block';
        modalImg.src = img.src;
        // 重置缩放和位置
        this.currentScale = 1;
        this.translateX = 0;
        this.translateY = 0;
        this.updateModalImageScale();
    }
    updateModalImageScale() {
        const modalImg = document.getElementById('modalImage');
        const modalBox = document.getElementById('modalBoundingBox');
        // 应用缩放和平移
        modalImg.style.transform = `translate(${this.translateX}px, ${this.translateY}px) scale(${this.currentScale})`;
        modalImg.style.transformOrigin = '0 0';  // 设置变换原点为左上角
        if (this.currentModalBox) {
            modalBox.style.transform = `translate(${this.translateX}px, ${this.translateY}px) scale(${this.currentScale})`;
            modalBox.style.transformOrigin = '0 0';
        }
    }
    async renderPaths() {
        const container = document.getElementById('pathsContainer');
        container.innerHTML = '';
        // 获取当前URL的分析数据
        const currentUrl = this.urls[this.currentIndex];
        const analysisForUrl = this.analysisData[currentUrl] || {};
        // 添加任务描述区域 - 即使没有数据也显示空白框架
        const taskContainer = document.createElement('div');
        taskContainer.className = 'task-extract-container';
        for (const meta of analysisForUrl.shortestPathsMeta[0].task_summaries) {
            // 添加任务标题
            const titleLabel = document.createElement('div');
            titleLabel.className = 'text-label';
            titleLabel.textContent = '提炼任务:';
            taskContainer.appendChild(titleLabel);
            const titleText = document.createElement('div');
            titleText.className = 'task-description';
            // 如果有数据则显示，否则留空
            titleText.textContent = meta.question || '';
            taskContainer.appendChild(titleText);
            // 添加过程描述
            const descLabel = document.createElement('div');
            descLabel.className = 'text-label';
            descLabel.textContent = '参考答案:';
            taskContainer.appendChild(descLabel);
            const descText = document.createElement('div');
            descText.className = 'process-description';
            // 如果有数据则显示，否则留空
            descText.textContent = meta.answer || '';
            taskContainer.appendChild(descText);
        }
        container.appendChild(taskContainer);
        for (const path of this.currentData.shortestPaths) {
            const pathDiv = document.createElement('div');
            pathDiv.className = 'path-container';
            const meta = this.currentData.shortestPathsMeta.find(m => 
                JSON.stringify(m.chainIDs) === JSON.stringify(path));
            if (!meta) continue;
            // 添加缩略图预览区域
            const thumbnailsDiv = document.createElement('div');
            thumbnailsDiv.className = 'path-thumbnails';
            // 创建所有步骤的缩略图
            for (let i = 0; i < path.length; i++) {
                const thumbDiv = document.createElement('div');
                thumbDiv.className = 'thumbnail-container';
                // 添加步骤编号
                const stepLabel = document.createElement('div');
                stepLabel.className = 'thumbnail-step-label';
                stepLabel.textContent = `${i}`;
                thumbDiv.appendChild(stepLabel);
                // 添加缩略图，使用 chainChildNum 而不是滚动位置
                const imgPath = i === path.length - 1 
                    ? `screenshots/${path[i]}_full.png`
                    : `screenshots/${path[i]}_${meta.chainChildNum[i]}.png`;
                const thumbImg = document.createElement('img');
                thumbImg.src = imgPath;
                thumbImg.className = 'thumbnail-image';
                thumbImg.onclick = () => this.openModal(thumbImg);
                thumbDiv.appendChild(thumbImg);
                thumbnailsDiv.appendChild(thumbDiv);
            }
            pathDiv.appendChild(thumbnailsDiv);
            for (let i = 0; i < path.length; i++) {
                const stepDiv = document.createElement('div');
                stepDiv.className = 'step-container';
                const stepHeader = document.createElement('div');
                stepHeader.className = 'step-header';
                stepHeader.textContent = `步骤 ${i}`;
                stepDiv.appendChild(stepHeader);
                const urlDiv = document.createElement('div');
                urlDiv.className = 'step-url';
                urlDiv.textContent = `URL: ${meta.chainUrls[i]}`;
                stepDiv.appendChild(urlDiv);
                // 添加request ID显示和按钮区域
                const requestId = path[i];
                const requestIdDiv = document.createElement('div');
                requestIdDiv.className = 'request-id-container';
                // 显示request ID
                const requestIdLabel = document.createElement('span');
                requestIdLabel.textContent = `request id: ${requestId}`;
                requestIdDiv.appendChild(requestIdLabel);
                // 添加查看child按钮
                const viewChildBtn = document.createElement('button');
                viewChildBtn.className = 'view-btn';
                viewChildBtn.textContent = '查看child';
                viewChildBtn.onclick = () => this.viewChildJson(requestId);
                requestIdDiv.appendChild(viewChildBtn);
                // 添加查看request queue按钮
                const viewQueueBtn = document.createElement('button');
                viewQueueBtn.className = 'view-btn';
                viewQueueBtn.textContent = '查看request queue';
                viewQueueBtn.onclick = () => this.viewRequestQueueJson(requestId);
                requestIdDiv.appendChild(viewQueueBtn);
                stepDiv.appendChild(requestIdDiv);
                if (i < meta.chainTexts.length) {
                    const textDiv = document.createElement('div');
                    textDiv.className = 'path-info';
                    textDiv.textContent = `点击文本: ${meta.chainTexts[i]}`;
                    stepDiv.appendChild(textDiv);
                }
                // 构建并显示截图路径，使用 chainChildNum 而不是滚动位置
                const imgPath = i === path.length - 1 
                    ? `screenshots/${path[i]}_full.png`
                    : `screenshots/${path[i]}_${meta.chainChildNum[i]}.png`;
                const screenshotPathDiv = document.createElement('div');
                screenshotPathDiv.className = 'screenshot-path';
                screenshotPathDiv.textContent = `截图路径: ${imgPath}`;
                // 添加下载按钮
                const downloadBtn = document.createElement('button');
                downloadBtn.className = 'download-btn';
                downloadBtn.textContent = '下载截图';
                downloadBtn.onclick = () => this.downloadImage(imgPath);
                screenshotPathDiv.appendChild(downloadBtn);
                stepDiv.appendChild(screenshotPathDiv);
                const screenshotDiv = document.createElement('div');
                screenshotDiv.className = 'screenshot-container';
                const img = document.createElement('img');
                img.src = imgPath;
                img.onclick = () => this.openModal(img);
                img.onload = () => {
                    if (i < meta.chainViewportBoundingBoxes.length && i !== path.length - 1) {
                        const box = meta.chainViewportBoundingBoxes[i];
                        const boundingBox = document.createElement('div');
                        boundingBox.className = 'bounding-box';
                        const scale = img.width / img.naturalWidth;
                        boundingBox.style.left = `${box.x * scale}px`;
                        boundingBox.style.top = `${box.y * scale}px`;
                        boundingBox.style.width = `${box.width * scale}px`;
                        boundingBox.style.height = `${box.height * scale}px`;
                        screenshotDiv.appendChild(boundingBox);
                    }
                };
                screenshotDiv.appendChild(img);
                stepDiv.appendChild(screenshotDiv);
                pathDiv.appendChild(stepDiv);
            }
            container.appendChild(pathDiv);
        }
    }
    // 添加查看child JSON的方法
    async viewChildJson(requestId) {
        try {
            const response = await fetch(`path/childs/${requestId}.json`);
            if (!response.ok) {
                throw new Error(`HTTP error! status: ${response.status}`);
            }
            const data = await response.json();
            this.showJsonModal('Child JSON', data);
        } catch (error) {
            console.error('加载child JSON失败:', error);
            alert(`加载child JSON失败: ${error.message}`);
        }
    }
    // 添加查看request queue JSON的方法
    async viewRequestQueueJson(requestId) {
        try {
            const response = await fetch(`storage/request_queues/default/${requestId}.json`);
            if (!response.ok) {
                throw new Error(`HTTP error! status: ${response.status}`);
            }
            const data = await response.json();
            this.showJsonModal('Request Queue JSON', data);
        } catch (error) {
            console.error('加载request queue JSON失败:', error);
            alert(`加载request queue JSON失败: ${error.message}`);
        }
    }
    // 显示JSON模态框
    showJsonModal(title, jsonData) {
        // 检查是否已存在JSON模态框，如果不存在则创建
        let jsonModal = document.getElementById('jsonModal');
        if (!jsonModal) {
            jsonModal = document.createElement('div');
            jsonModal.id = 'jsonModal';
            jsonModal.className = 'modal';
            const modalContent = document.createElement('div');
            modalContent.className = 'modal-content json-modal-content';
            const closeBtn = document.createElement('span');
            closeBtn.className = 'close-btn';
            closeBtn.innerHTML = '&times;';
            closeBtn.onclick = () => { jsonModal.style.display = 'none'; };
            const modalTitle = document.createElement('h3');
            modalTitle.id = 'jsonModalTitle';
            const preElement = document.createElement('pre');
            preElement.id = 'jsonContent';
            preElement.className = 'json-content';
            modalContent.appendChild(closeBtn);
            modalContent.appendChild(modalTitle);
            modalContent.appendChild(preElement);
            jsonModal.appendChild(modalContent);
            document.body.appendChild(jsonModal);
            // 点击模态框外部关闭
            jsonModal.onclick = (event) => {
                if (event.target === jsonModal) {
                    jsonModal.style.display = 'none';
                }
            };
        }
        // 更新模态框内容
        document.getElementById('jsonModalTitle').textContent = title;
        // 格式化并高亮JSON
        const formattedJson = JSON.stringify(jsonData, null, 2);
        const preElement = document.getElementById('jsonContent');
        preElement.textContent = formattedJson;
        // 如果有语法高亮库（如highlight.js），可以在这里应用
        if (window.hljs) {
            preElement.innerHTML = window.hljs.highlight('json', formattedJson).value;
        }
        // 显示模态框
        jsonModal.style.display = 'block';
    }
    // 添加下载图片的方法
    async downloadImage(imgPath) {
        try {
            const response = await fetch(imgPath);
            const blob = await response.blob();
            const url = window.URL.createObjectURL(blob);
            const a = document.createElement('a');
            a.href = url;
            a.download = imgPath.split('/').pop(); // 使用原始文件名
            document.body.appendChild(a);
            a.click();
            window.URL.revokeObjectURL(url);
            document.body.removeChild(a);
        } catch (error) {
            console.error('下载图片失败:', error);
            alert('下载图片失败，请重试');
        }
    }
 }
 // 初始化可视化器
 document.addEventListener('DOMContentLoaded', () => {
    new PathVisualizer();
 });
--- a/misc/temp_analysis/exam.json
+++ b/misc/temp_analysis/exam.json
--- a/misc/temp_analysis/exam_gen.py
+++ b/misc/temp_analysis/exam_gen.py
@ -0,0 +1,55 @@
 import json
 import os
 import uuid
 def create_exam_cases():
    # 读取process_3.json文件
    with open('temp_analysis/process_3.json', 'r', encoding='utf-8') as f:
        data = json.load(f)
    exam_cases = []
    num = 0
    # 遍历每个URL条目
    for url, url_data in data.items():
        # 获取title作为question
        question = url_data.get('title', '').strip()
        # 遍历所有最短路径的元数据
        for path_meta in url_data.get('shortestPathsMeta', []):
            chain_ids = path_meta.get('chainIDs', [])
            chain_urls = path_meta.get('chainUrls', [])
            chain_ax_tree_ids = path_meta.get('chainAxTreeID', [])
            chain_texts = path_meta.get('chainTexts', [])
            trajectory_id = str(uuid.uuid4())
            trajectory_step_num = 0
            # 确保所有列表长度一致
            min_length = min(len(chain_ids), len(chain_urls), len(chain_ax_tree_ids), len(chain_texts))
            # 为每个步骤创建一个测试用例
            for i in range(min_length):
                num +=1
                trajectory_step_num += 1
                case = {
                    "num": num,
                    "id": str(uuid.uuid4()),
                    "trajectory_id": trajectory_id,
                    "trajectory_step_num": trajectory_step_num,
                    "page_id": chain_ids[i] if i < len(chain_ids) else "",
                    "url": chain_urls[i] if i < len(chain_urls) else "",
                    "question": question,
                    "answer": str(chain_ax_tree_ids[i]) if i < len(chain_ax_tree_ids) else "",
                    "answer_text": chain_texts[i] if i < len(chain_texts) else ""
                }
                exam_cases.append(case)
    # 将结果保存到exam.json
    with open('temp_analysis/exam.json', 'w', encoding='utf-8') as f:
        json.dump(exam_cases, f, ensure_ascii=False, indent=2)
    print(f"已生成 {len(exam_cases)} 个测试用例并保存到 temp_analysis/exam.json")
 if __name__ == "__main__":
    create_exam_cases()
--- a/misc/temp_analysis/exam_run_axtree.py
+++ b/misc/temp_analysis/exam_run_axtree.py
@ -0,0 +1,331 @@
 import json
 import os
 import re
 import logging
 import concurrent.futures
 import argparse
 from openai import OpenAI
 from dotenv import load_dotenv
 import datetime
 # 设置日志
 # 创建logger
 log = logging.getLogger(__name__)
 log.setLevel(logging.INFO)
 # 创建文件处理器
 file_handler = logging.FileHandler('temp_analysis/test_run.log')
 file_handler.setLevel(logging.INFO)
 # 创建控制台处理器
 console_handler = logging.StreamHandler()
 console_handler.setLevel(logging.INFO)
 # 创建格式器并添加到处理器
 formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 file_handler.setFormatter(formatter)
 console_handler.setFormatter(formatter)
 # 将处理器添加到logger
 log.addHandler(file_handler)
 log.addHandler(console_handler)
 # 加载环境变量
 load_dotenv()
 PARALLEL_WORKERS=4
 MODEL_NAME=os.getenv("MODEL_NAME")
 print(f"MODEL_NAME: {MODEL_NAME}")
 # 配置OpenAI客户端
 # client = OpenAI()
 client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
    base_url=os.getenv("OPENAI_API_BASE_URL")  # 如果使用其他兼容服务，可以设置基础URL
 )
 # modified version of the task proposer agent prompt from https://arxiv.org/pdf/2502.11357
 # System prompt
 cot_system_prompt = """
 What does this webpage show? Imagine you are a real user on this webpage. Given the webpage
 screenshot or ocr result and parsed HTML/accessibility tree and the task description, please provide 
 the first action towards completing that task.
 Do the following step by step:
 1. Given the webpage screenshot or ocr result and parsed HTML/accessibility tree, generate the first action
 towards completing that task (in natural language form).
 2. Given the webpage screenshot or ocr result, parsed HTML/accessibility tree, and the natural language
 action, generate the grounded version of that action.
 ACTION SPACE: Your action space is: ['click [element ID]', 'type [element ID] [content]',
 'select [element ID] [content of option to select]', 'scroll [up]', 'scroll [down]', and 'stop'].
 Action output should follow the syntax as given below:
 click [element ID]: This action clicks on an element with a specific ID on the webpage.
 type [element ID] [content]: Use this to type the content into the field with id. By default, the
 "Enter" key is pressed after typing. Both the content and the ID should be within square braces
 as per the syntax.
 select [element ID] [content of option to select]: Select an option from a dropdown menu. The
 content of the option to select should be within square braces. When you get (select an option)
 tags from the accessibility tree, you need to select the serial number (element_id) corresponding
 to the select tag, not the option, and select the most likely content corresponding to the option as
 input.
 scroll [down]: Scroll the page down.
 scroll [up]: Scroll the page up.
 IMPORTANT: 
 To be successful, it is important to STRICTLY follow the below rules:
 Action generation rules:
 1. You should generate a single atomic action at each step.
 2. The action should be an atomic action from the given vocabulary - click, type, select, scroll
 (up or down), or stop.
 3. The arguments to each action should be within square braces. For example, "click [127]",
 "type [43] [content to type]", "scroll [up]", "scroll [down]".
 4. The natural language form of action (corresponding to the field "action_in_natural_language")
 should be consistent with the grounded version of the action (corresponding to the field "grounded
 _action"). Do NOT add any additional information in the grounded action. For example, if a
 particular element ID is specified in the grounded action, a description of that element must be
 present in the natural language action.
 5. If the type action is selected, the natural language form of action ("action_in_natural_language") should always specify the actual text to be typed.
 6. You should issue a "stop" action if the current webpage asks to log in or for credit card
 information.
 7. To input text, there is NO need to click the textbox first, directly type content. After typing,
 the system automatically hits the 'ENTER' key.
 8. STRICTLY Avoid repeating the same action (click/type) if the webpage remains unchanged.
 You may have selected the wrong web element.
 9. Do NOT use quotation marks in the action generation.
 OUTPUT FORMAT: 
 Please give a short analysis of the screenshot, parsed
 HTML/accessibility tree, then put your answer within ``` ```, for example,
 "In summary, the proposed task and the corresponding action is: ```{
 "action_in_natural_language": "<ACTION_IN_NATURAL_LANGUAGE>:str",
 "grounded_action": "<ACTION>:str"}```
 """
 # User prompt
 cot_user_prompt = """
 Website URL: {INIT_URL}
 Parsed HTML/Accessibility Tree: {A11Y_TREE}
 Screenshot ocr result: {SCREENSHOT}
 Task description: {TASK_DESCRIPTION}
 """
 def call_api(messages):
    """使用openai库调用API接口"""
    try:
        response = client.chat.completions.create(
            messages=messages,
            # model="gpt-4o-mini"
            model=MODEL_NAME
        )
        return response
    except Exception as e:
        log.error(f"API调用出错: {e}")
        return None
 def extract_action(response_text):
    """从API响应中提取action_in_natural_language和grounded_action"""
    # 使用正则表达式提取JSON部分
    match = re.search(r'```\s*{\s*(.+?)\s*}\s*```', response_text, re.DOTALL)
    if match:
        try:
            # 构建完整的JSON字符串并解析
            json_str = "{" + match.group(1) + "}"
            action_data = json.loads(json_str)
            return action_data.get("action_in_natural_language"), action_data.get("grounded_action")
        except json.JSONDecodeError:
            log.error(f"无法解析JSON: {match.group(1)}")
    return None, None
 def check_answer(grounded_action, answer):
    try:
        log.info(f"grounded_action: {grounded_action}, answer: {answer}")
        """检查grounded_action是否匹配answer"""
        if not grounded_action or not grounded_action.startswith("click"):
            return False
        # 提取element ID
        match = re.search(r'click \[(\d+)\]', grounded_action)
        if not match:
            return False
        element_id = match.group(1)
        # 将answer拆分为列表（逗号分隔）
        answer_ids = [id.strip() for id in answer.split(",")]
        # 检查element ID是否在answer列表中
        return element_id in answer_ids
    except Exception as e:
        log.error(f"检查答案出错: {e}")
        return False
 def process_item(item):
    """处理单个测试项目"""
    item_num = item["num"]
    item_id = item["id"]
    page_id = item["page_id"]
    url = item["url"]
    task_description = item["question"]
    answer = item["answer"]
    answer_text = item["answer_text"]
    trajectory_id = item["trajectory_id"]
    trajectory_step_num = item["trajectory_step_num"]
    log.info(f"处理ID: {item_id}, URL: {url}")
    # 读取a11y tree
    try:
        with open(f"axtrees/{page_id}.txt", "r") as f:
            a11y_tree = f.read()
    except FileNotFoundError:
        log.error(f"找不到文件: axtrees/{page_id}.txt")
        return None
    # 构建API请求
    messages = [
        {"role": "system", "content": cot_system_prompt},
        {"role": "user", "content": cot_user_prompt.format(
            INIT_URL=url,
            A11Y_TREE=a11y_tree,
            SCREENSHOT="",  # 这里没有提供截图OCR结果
            TASK_DESCRIPTION=task_description
        )}
    ]
    log.info(f"task_description: {task_description}")
    log.info(f"answer: {answer}, answer_text: {answer_text}")
    # 调用API
    response = call_api(messages)
    if response and hasattr(response, 'choices') and len(response.choices) > 0:
        content = response.choices[0].message.content
        if MODEL_NAME == "qwen/qwq-32b:free":
            reasoning_content = response.choices[0].message.reasoning
        else:
            reasoning_content = response.choices[0].message.reasoning_content
        log.info(f"reasoning_content: {reasoning_content}")
        log.info(f"content: {content}")
        # 提取action
        action_nl, grounded_action = extract_action(content)
        log.info(f"action_nl: {action_nl}, grounded_action: {grounded_action}")
        # 检查答案
        is_correct = check_answer(grounded_action, answer)
        log.info(f"is_correct: {is_correct}")
        # 记录结果
        result = {
            "num": item_num,
            "id": item_id,
            "trajectory_id": trajectory_id,
            "trajectory_step_num": trajectory_step_num,
            "page_id": page_id,
            "url": url,
            "task": task_description,
            "expected_answer": answer,
            "thinking": reasoning_content,
            "raw_content": content,
            "action_nl": action_nl,
            "grounded_action": grounded_action,
            "is_correct": is_correct,
            "model": MODEL_NAME,
            "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }
        log.info(f"ID: {item_id}")
        log.info(f"任务: {task_description}")
        log.info(f"动作: {grounded_action}")
        log.info(f"是否正确: {is_correct}")
        log.info("-" * 50)
        return result
    else:
        log.error(f"API调用失败: {response}")
        return None
 def main():
    # 运行完整测试
    # 读取exam.json
    with open("temp_analysis/exam.json", "r") as f:
        exam_data = json.load(f)
    if os.path.exists("temp_analysis/results.json"):
        # 加载results.json
        with open("temp_analysis/results.json", "r") as f:
            results_old = json.load(f)
        # 提取is_correct为True的测试项
        results = [r for r in results_old if r.get("is_correct", False)]
        log.info(f"已经成功完成的测试项目数: {len(results)}")
    else:
        results = []
        log.info(f"results.json文件不存在，将重新执行所有测试")
    success_ids = [r["id"] for r in results]
    # 从exam_data中删除已经成功完成的测试项
    exam_data = [item for item in exam_data if item["id"] not in success_ids]
    total_items = len(exam_data) + len(results)
    completed = len(results)
    success_count = len(results)
    fail_count = 0
    log.info(f"开始测试，需要执行 {total_items} 个任务, 已经成功 {success_count} 个任务")
    # 使用线程池并发处理
    with concurrent.futures.ThreadPoolExecutor(max_workers=PARALLEL_WORKERS) as executor:
        # 提交所有任务
        future_to_item = {executor.submit(process_item, item): item for item in exam_data}
        # 收集结果
        for future in concurrent.futures.as_completed(future_to_item):
            result = future.result()
            # 使用文件锁防止并发写入冲突
            completed += 1
            # 重新读取最新结果,避免覆盖其他线程的写入
            try:
                with open("temp_analysis/results.json", "r") as f:
                    results = json.load(f)
            except FileNotFoundError:
                results = []
            if result:
                results.append(result)
                if result["is_correct"]:
                    success_count += 1 
                else:
                    fail_count += 1
            else:
                fail_count += 1
            # 打印当前进度
            progress = (completed / total_items) * 100
            log.info(f"进度: {progress:.2f}% ({completed}/{total_items}) - 成功: {success_count}, 失败: {fail_count}")
            log.info(f"save results to temp_analysis/results.json")
            # 保存结果
            with open("temp_analysis/results.json", "w") as f:
                json.dump(results, f, indent=2)
    # 计算正确率
    accuracy = success_count / total_items if total_items > 0 else 0
    log.info(f"测试完成! 总计: {total_items}题，正确: {success_count}题，错误: {fail_count}题，正确率: {accuracy:.2%}")
 if __name__ == "__main__":
    main()
--- a/misc/temp_analysis/exam_run_vision.log
+++ b/misc/temp_analysis/exam_run_vision.log
--- a/misc/temp_analysis/exam_run_vision.py
+++ b/misc/temp_analysis/exam_run_vision.py
@ -0,0 +1,418 @@
 import json
 import os
 import re
 import logging
 import concurrent.futures
 import argparse
 from openai import OpenAI
 from dotenv import load_dotenv
 import datetime
 import base64
 from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
 # from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
 from qwen_vl_utils import process_vision_info
 os.environ["CUDA_VISIBLE_DEVICES"] = "1,2,3"
 # 设置日志
 # 创建logger
 log = logging.getLogger(__name__)
 log.setLevel(logging.INFO)
 # 创建文件处理器
 file_handler = logging.FileHandler('temp_analysis/exam_run_vision.log')
 file_handler.setLevel(logging.INFO)
 # 创建控制台处理器
 console_handler = logging.StreamHandler()
 console_handler.setLevel(logging.INFO)
 # 创建格式器并添加到处理器
 formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 file_handler.setFormatter(formatter)
 console_handler.setFormatter(formatter)
 # 将处理器添加到logger
 log.addHandler(file_handler)
 log.addHandler(console_handler)
 # 加载环境变量
 load_dotenv()
 PARALLEL_WORKERS=1
 MODEL_NAME=os.getenv("MODEL_NAME")
 print(f"MODEL_NAME: {MODEL_NAME}")
 # 配置OpenAI客户端
 # client = OpenAI()
 client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
    base_url=os.getenv("OPENAI_API_BASE_URL")  # 如果使用其他兼容服务，可以设置基础URL
 )
 # modified version of the task proposer agent prompt from https://arxiv.org/pdf/2502.11357
 # System prompt
 cot_system_prompt = """
 What does this webpage show? Imagine you are a real user on this webpage. Given the webpage
 screenshot or ocr result and parsed HTML/accessibility tree and the task description, please provide 
 the first action towards completing that task.
 Do the following step by step:
 1. Given the webpage screenshot or ocr result and parsed HTML/accessibility tree, generate the first action
 towards completing that task (in natural language form).
 2. Given the webpage screenshot or ocr result, parsed HTML/accessibility tree, and the natural language
 action, generate the grounded version of that action.
 ACTION SPACE: Your action space is: ['click [element ID]', 'type [element ID] [content]',
 'select [element ID] [content of option to select]', 'scroll [up]', 'scroll [down]', and 'stop'].
 Action output should follow the syntax as given below:
 click [element text]: This action clicks on an element with a specific text on the webpage.
 type [element ID] [content]: Use this to type the content into the field with id. By default, the
 "Enter" key is pressed after typing. Both the content and the ID should be within square braces
 as per the syntax.
 select [element ID] [content of option to select]: Select an option from a dropdown menu. The
 content of the option to select should be within square braces. When you get (select an option)
 tags from the accessibility tree, you need to select the serial number (element_id) corresponding
 to the select tag, not the option, and select the most likely content corresponding to the option as
 input.
 scroll [down]: Scroll the page down.
 scroll [up]: Scroll the page up.
 IMPORTANT: 
 To be successful, it is important to STRICTLY follow the below rules:
 Action generation rules:
 1. You should generate a single atomic action at each step.
 2. The action should be an atomic action from the given vocabulary - click, type, select, scroll
 (up or down), or stop.
 3. The arguments to each action should be within square braces. For example, "click [127]",
 "type [43] [content to type]", "scroll [up]", "scroll [down]".
 4. The natural language form of action (corresponding to the field "action_in_natural_language")
 should be consistent with the grounded version of the action (corresponding to the field "grounded
 _action"). Do NOT add any additional information in the grounded action. For example, if a
 particular element ID is specified in the grounded action, a description of that element must be
 present in the natural language action.
 5. If the type action is selected, the natural language form of action ("action_in_natural_language") should always specify the actual text to be typed.
 6. You should issue a "stop" action if the current webpage asks to log in or for credit card
 information.
 7. To input text, there is NO need to click the textbox first, directly type content. After typing,
 the system automatically hits the 'ENTER' key.
 8. STRICTLY Avoid repeating the same action (click/type) if the webpage remains unchanged.
 You may have selected the wrong web element.
 9. Do NOT use quotation marks in the action generation.
 OUTPUT FORMAT: 
 Please give a short analysis of the screenshot, parsed
 HTML/accessibility tree, then put your answer within ``` ```, for example,
 "In summary, the proposed task and the corresponding action is: ```{
 "action_in_natural_language": "<ACTION_IN_NATURAL_LANGUAGE>:str",
 "grounded_action": "<ACTION>:str"}```
 """
 # User prompt
 cot_user_prompt = """
 [
    {{
        "type": "text",
        "text": "Website URL: {INIT_URL}\nParsed HTML/Accessibility Tree: {A11Y_TREE}\nTask description: {TASK_DESCRIPTION}"
    }},
    {{
        "type": "image_url",
        "image_url": {{
            "url": f"file://{SCREENSHOT}"
        }}
    }}
 ]
 """
 def load_qwen_model():
    """
    加载Qwen2VL模型和处理器
    """
    global qwen_model, processor
    # 设置本地模型路径
    model_path = "/data1/yuyr/Qwen2.5-VL-7B-Instruct"
    # model_path = "/home/yuyr/.cache/huggingface/hub/models--Qwen--Qwen2.5-VL-3B-Instruct/snapshots/1b989f2c63999d7344135894d3cfa8f494116743/"
    # model_path = "/data1/yuyr/models--bytedance-research--UI-TARS-7B-DPO/snapshots/727b0df39207dafc6cf211a61f29d84b7659c39c/"
    try:
        log.info("正在加载Qwen2VL模型和处理器...")
        # 加载模型
        qwen_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
        # qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
            model_path, torch_dtype="auto", device_map="cuda:1"
            # model_path, torch_dtype="auto", device_map="auto"
        )
        log.info("Qwen2VL模型加载完成")
        # 加载processor
        processor = AutoProcessor.from_pretrained(model_path)
        log.info("处理器加载完成")
        log.info(f"Qwen2VL模型和处理器加载完成")
        return True
    except Exception as e:
        log.error(f"Qwen2VL模型加载失败: {e}")
        return False
 def call_api(messages):
    """使用openai库调用API接口"""
    try:
        log.info(f"call llm messages ")
        response = client.chat.completions.create(
            messages=messages,
            model=MODEL_NAME
        )
        log.info(f"call llm response: {response}")
        return response
    except Exception as e:
        log.error(f"API调用出错: {e}")
        return None
 def call_qwen_model(messages):
    """
    调用Qwen2VL模型
    """
    # 准备推理输入
    try:
        text = processor.apply_chat_template(
            messages, tokenize=False, add_generation_prompt=True
        )
        image_inputs, video_inputs = process_vision_info(messages)
        inputs = processor(
            text=[text],
            images=image_inputs,
            videos=video_inputs,
            padding=True,
            return_tensors="pt",
        )
        inputs = inputs.to(qwen_model.device)
        # 推理：生成输出
        log.info("正在生成模型回复...")
        generated_ids = qwen_model.generate(**inputs, max_new_tokens=1024)
        generated_ids_trimmed = [
            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
        ]
        output_text = processor.batch_decode(
            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
        )[0]
        log.info(f"模型输出: {output_text}")
        return output_text
    except Exception as e:
        log.error(f"模型推理失败: {e}")
        return None
 def extract_action(response_text):
    """从API响应中提取action_in_natural_language和grounded_action"""
    # 使用正则表达式提取JSON部分
    match = re.search(r'```\s*{\s*(.+?)\s*}\s*```', response_text, re.DOTALL)
    if match:
        try:
            # 构建完整的JSON字符串并解析
            json_str = "{" + match.group(1) + "}"
            action_data = json.loads(json_str)
            return action_data.get("action_in_natural_language"), action_data.get("grounded_action")
        except json.JSONDecodeError:
            log.error(f"无法解析JSON: {match.group(1)}")
    return None, None
 def check_answer(grounded_action, answer):
    try:
        log.info(f"grounded_action: {grounded_action}, answer: {answer}")
        """检查grounded_action是否匹配answer"""
        if not grounded_action or not grounded_action.startswith("click"):
            return False
        # 提取element ID
        match = re.search(r'click \[(\d+)\]', grounded_action)
        if not match:
            return False
        element_id = match.group(1)
        # 将answer拆分为列表（逗号分隔）
        answer_ids = [id.strip() for id in answer.split(",")]
        # 检查element ID是否在answer列表中
        return element_id in answer_ids
    except Exception as e:
        log.error(f"检查答案出错: {e}")
        return False
 def process_item(item):
    """处理单个测试项目"""
    item_num = item["num"]
    item_id = item["id"]
    page_id = item["page_id"]
    url = item["url"]
    task_description = item["task"]
    answer = item["expected_answer"]
    answer_text = item["answer_text"]
    trajectory_id = item["trajectory_id"]
    trajectory_step_num = item["trajectory_step_num"]
    page_child_num = item["page_child_num"]
    log.info(f"处理ID: {item_id}, URL: {url}")
    image_path = f"screenshots/{page_id}_{page_child_num}.png"
    if not os.path.exists(image_path):
        log.error(f"找不到文件: {image_path}")
        return None
    def encode_image_to_base64(image_path):
        """
        将图片编码为base64字符串
        """
        try:
            with open(image_path, "rb") as image_file:
                return base64.b64encode(image_file.read()).decode('utf-8')
        except Exception as e:
            log.error(f"图片编码失败: {e}")
            return None
    SCREENSHOT = image_path
    # 构建API请求
    messages = [
        {"role": "system", "content": cot_system_prompt},
        {"role": "user", "content": cot_user_prompt.format(
            INIT_URL=url,
            A11Y_TREE="",
            SCREENSHOT=SCREENSHOT,  # 这里没有提供截图OCR结果
            TASK_DESCRIPTION=task_description
        )}
    ]
    log.info(f"task_description: {task_description}")
    log.info(f"answer: {answer}, answer_text: {answer_text}")
    # 调用API
    # response = call_api(messages)
    response = call_qwen_model(messages)
    print(f"response: {response}")
    if response:
        content = response
        # if MODEL_NAME == "qwen/qwq-32b:free":
        #     reasoning_content = response.choices[0].message.reasoning
        # else:
        #     reasoning_content = response.choices[0].message.reasoning_content
        # log.info(f"reasoning_content: {reasoning_content}")
        log.info(f"content: {content}")
        # 提取action
        action_nl, grounded_action = extract_action(content)
        log.info(f"action_nl: {action_nl}, grounded_action: {grounded_action}")
        # 检查答案，使用文字检查
        is_correct = check_answer(grounded_action, answer_text)
        log.info(f"is_correct: {is_correct}")
        # 记录结果
        result = {
            "num": item_num,
            "id": item_id,
            "trajectory_id": trajectory_id,
            "trajectory_step_num": trajectory_step_num,
            "page_id": page_id,
            "url": url,
            "task": task_description,
            "expected_answer": answer,
            "thinking": reasoning_content,
            "raw_content": content,
            "action_nl": action_nl,
            "grounded_action": grounded_action,
            "is_correct": is_correct,
            "model": MODEL_NAME,
            "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }
        log.info(f"ID: {item_id}")
        log.info(f"任务: {task_description}")
        log.info(f"动作: {grounded_action}")
        log.info(f"是否正确: {is_correct}")
        log.info("-" * 50)
        return result
    else:
        log.error(f"API调用失败: {response}")
        return None
 def main():
    load_qwen_model()
    # 运行完整测试
    # 读取exam.json
    with open("temp_analysis/test.json", "r") as f:
        exam_data = json.load(f)
    results = []
    total_items = len(exam_data) + len(results)
    completed = len(results)
    success_count = len(results)
    fail_count = 0
    log.info(f"开始测试，需要执行 {total_items} 个任务, 已经成功 {success_count} 个任务")
    # 使用线程池并发处理
    with concurrent.futures.ThreadPoolExecutor(max_workers=PARALLEL_WORKERS) as executor:
        # 提交所有任务
        future_to_item = {executor.submit(process_item, item): item for item in exam_data}
        # 收集结果
        for future in concurrent.futures.as_completed(future_to_item):
            result = future.result()
            # 使用文件锁防止并发写入冲突
            completed += 1
            # 重新读取最新结果,避免覆盖其他线程的写入
            try:
                with open("temp_analysis/results_vl.json", "r") as f:
                    results = json.load(f)
            except FileNotFoundError:
                results = []
            if result:
                results.append(result)
                if result["is_correct"]:
                    success_count += 1 
                else:
                    fail_count += 1
            else:
                fail_count += 1
            # 打印当前进度
            progress = (completed / total_items) * 100
            log.info(f"进度: {progress:.2f}% ({completed}/{total_items}) - 成功: {success_count}, 失败: {fail_count}")
            log.info(f"save results to temp_analysis/results_vl.json")
            # 保存结果
            with open("temp_analysis/results_vl.json", "w") as f:
                json.dump(results, f, indent=2)
    # 计算正确率
    accuracy = success_count / total_items if total_items > 0 else 0
    log.info(f"测试完成! 总计: {total_items}题，正确: {success_count}题，错误: {fail_count}题，正确率: {accuracy:.2%}")
 if __name__ == "__main__":
    main()
--- a/misc/temp_analysis/merge_process_3.py
+++ b/misc/temp_analysis/merge_process_3.py
@ -0,0 +1,55 @@
 import json
 import os
 def merge_json_files():
    # 定义输入和输出文件路径
    process_3_path = "path/processed_3.json"
    processed_with_analysis_path = "backup/v17_2/path/processed_3_with_analysis_20250324163759.json"
    output_path = "temp_analysis/process_3.json"
    # 读取第一个JSON文件
    with open(process_3_path, 'r', encoding='utf-8') as f:
        process_3_data = json.load(f)
    # 读取第二个JSON文件
    with open(processed_with_analysis_path, 'r', encoding='utf-8') as f:
        processed_with_analysis_data = json.load(f)
    # 获取两个文件的键交集
    common_keys = set(process_3_data.keys()) & set(processed_with_analysis_data.keys())
    # 创建新的JSON数据
    merged_data = {}
    for key in common_keys:
        skip = False
        # 如果shortestPathsMeta为空，则从processed_3_with_analysis文件中获取
        axTreeIDList = process_3_data[key]['shortestPathsMeta'][0]['chainAxTreeID']
        for axTreeID in axTreeIDList:
            if axTreeID is None:
                skip = True
                break
        if skip:
            continue
        # 以process_3.json为基础
        merged_data[key] = process_3_data[key]
        # 从processed_3_with_analysis文件中获取raw_result和title并添加
        if 'raw_result' in processed_with_analysis_data[key]['shortestPathsMeta'][0]:
            merged_data[key]['raw_result'] = processed_with_analysis_data[key]['shortestPathsMeta'][0]['raw_result']
        if 'title' in processed_with_analysis_data[key]['shortestPathsMeta'][0]:
            merged_data[key]['title'] = processed_with_analysis_data[key]['shortestPathsMeta'][0]['title'].strip()
    # 将合并后的数据写入输出文件
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(merged_data, f, ensure_ascii=False, indent=2)
    print(f"合并完成！共处理了 {len(common_keys)} 个键。")
    print(f"结果已保存到 {output_path}")
 if __name__ == "__main__":
    merge_json_files()
--- a/misc/temp_analysis/mix.txt
+++ b/misc/temp_analysis/mix.txt
@ -0,0 +1,47 @@
 "Check for alerts indicating service performance exceeds 100 on Grafana"
 "Check the active notifications for any alerts related to the performance of your Kubernetes deployment and view the corresponding alert rules to ensure you can address any issues promptly on Grafana."
 "Check the active notifications for any alerts related to the performance of your Kubernetes deployment and view the corresponding alert rules to ensure you can address any issues promptly on Grafana."
 "Check the alert history and analyze the performance metrics of the Random Multiple Series dashboard on the Grafana website."
 "Check the alert history for discrepancies in website metrics and view alerts indicating if there are three times more page views than users on Grafana's monitoring dashboard on task website."
 "Check the alert history for the Random Single Serie alert to analyze its performance over time on Grafana"
 "Check the available flowcharting visualization examples, specifically focusing on the gradient color mode, to determine which one best fits your project needs on Grafana Play"
 "Check the current alerts for the do-nyc1-demo-infra Kubernetes cluster on the Grafana website"
 "Check the current alerts for the loadgen workload in the do-nyc1-demo-infra namespace to ensure there are no critical issues on the Grafana monitoring dashboard."
 "Check the historical alert notifications for a specific service and analyze the performance over time on the Grafana dashboard"
 "Check the historical performance metrics of a service using multiple data series on Grafana Play"
 "Check the history of alerts and review the details of the WW Alert on Grafana"
 "Check the history of alerts and review the details of the WW Alert on Grafana"
 "Check the history of alerts and test the alert rule for sun conditions on Grafana"
 "Check the history of alerts and test the alert rule for sun conditions on Grafana"
 "Check the performance and status of synthetic monitoring checks for uptime and latency on Grafana"
 "Check the performance and status of synthetic monitoring checks for uptime and latency on Grafana"
 "Check the performance metrics of application services on Grafana"
 "Check the performance metrics of services to ensure they have 100% uptime and latency under 500ms on Grafana"
 "Check the performance metrics of synthetic monitoring checks for a specific website and view the associated dashboard on Grafana Play"
 "Check the performance metrics of synthetic monitoring checks for a specific website and view the associated dashboard on Grafana Play"
 "Check the performance metrics of the AMQP service in the application monitoring dashboard on Grafana"
 "Check the performance metrics of the AMQP service in the application monitoring dashboard on Grafana"
 "Check the performance metrics of the faro-shop-worker service to ensure it is running optimally on the Grafana observability platform"
 "Check the performance metrics of the Grafana Home Page and ensure it meets the required uptime and response time standards for your website monitoring needs on Grafana Synthetic Monitoring."
 "Check the performance metrics of the Grafana Home Page and ensure its uptime and response time are within acceptable limits on Grafana's synthetic monitoring dashboard"
 "Check the performance metrics of the Grafana Home Page and Grafana Ping Check to ensure uptime is 100% and latency is under 500ms on Grafana's synthetic monitoring platform"
 "Check the performance metrics of the Grafana Home Page service on Grafana Play"
 "Check the performance metrics of the Grafana Home Page service to ensure its uptime and latency are within acceptable limits on Grafana Play"
 "Check the performance metrics of the Grafana website, including uptime and latency, to ensure it meets your requirements for a reliable monitoring service on the Grafana Synthetic Monitoring application."
 "Check the performance metrics of the website monitoring checks to ensure uptime and low latency on Grafana Synthetic Monitoring"
 "Check the performance of services in the application to identify any issues on the Grafana observability app"
 "Check the performance of the faro-shop-frontend service on the Grafana application"
 "Explore and view the flowcharting options demo to understand how to create flowcharts for data visualization on Grafana Play"
 "Explore Grafana's data visualization tools and features for monitoring cloud services on grafana.com"
 "Find and explore flowchart animation examples for dashboard creation on Grafana Play"
 "In summary, the answer is: Check the performance metrics of the faro-shop-backend service to ensure it is running smoothly on the Grafana monitoring dashboard"
 "Reset my password to access my Grafana account and manage my data visualizations on the Grafana website"
 "Reset my password to access my Grafana account and manage my data visualizations on the Grafana website"
 "Set up alert rules based on example dashboards on Grafana Play"
 "View a detailed example of a flowcharting rack diagram to understand its features and functionality on Grafana Play"
 "View a flowcharting example of technical architecture to understand its visualization in Grafana on Grafana Play"
 "View a flowcharting floorplan example for business metrics on Grafana Play"
 "View and compare different flowcharting network diagram examples to understand their features and functionalities on Grafana Play"
 "View and compare the performance metrics of different synthetic monitoring checks for the Grafana service, focusing on uptime and latency, to ensure optimal service availability on play.grafana.org"
 "View examples of hierarchical state level flowcharting capabilities on Grafana"
 "View performance metrics with sparklines to identify trends and make informed decisions on Grafana"
--- a/misc/temp_analysis/process_3.json
+++ b/misc/temp_analysis/process_3.json
--- a/misc/temp_analysis/r1.txt
+++ b/misc/temp_analysis/r1.txt
@ -0,0 +1,47 @@
 "Check for alerts indicating service performance exceeds 100 on Grafana"
 "Check the active notifications for any alerts related to the performance of your Kubernetes deployment and view the corresponding alert rules to ensure you can address any issues promptly on Grafana."
 "Check the active notifications for any alerts related to the performance of your Kubernetes deployment and view the corresponding alert rules to ensure you can address any issues promptly on Grafana."
 "Check the alert history and analyze the performance metrics of the Random Multiple Series dashboard on the Grafana website."
 "Check the alert history and create a new alert rule to monitor specific events on Grafana"
 "Check the alert history for discrepancies in website metrics and view alerts indicating if there are three times more page views than users on Grafana's monitoring dashboard on task website."
 "Check the alert history for the Random Single Serie alert to analyze its performance over time on Grafana"
 "Check the available flowcharting visualization examples, specifically focusing on the gradient color mode, to determine which one best fits your project needs on Grafana Play"
 "Check the current alerts for the do-nyc1-demo-infra Kubernetes cluster on the Grafana website"
 "Check the current alerts for the loadgen workload in the do-nyc1-demo-infra namespace to ensure there are no critical issues on the Grafana monitoring dashboard."
 "Check the historical alert notifications for a specific service and analyze the performance over time on the Grafana dashboard"
 "Check the historical performance metrics of a service using multiple data series on Grafana Play"
 "Check the history of alerts and review the details of the WW Alert on Grafana"
 "Check the history of alerts and review the details of the WW Alert on Grafana"
 "Check the history of alerts and test the alert rule for sun conditions on Grafana"
 "Check the history of alerts and test the alert rule for sun conditions on Grafana"
 "Check the performance and status of synthetic monitoring checks for uptime and latency on Grafana"
 "Check the performance and status of synthetic monitoring checks for uptime and latency on Grafana"
 "Check the performance metrics of application services on Grafana"
 "Check the performance metrics of synthetic monitoring checks for a specific website and view the associated dashboard on Grafana Play"
 "Check the performance metrics of synthetic monitoring checks for a specific website and view the associated dashboard on Grafana Play"
 "Check the performance metrics of the AMQP service in the application monitoring dashboard on Grafana"
 "Check the performance metrics of the AMQP service in the application monitoring dashboard on Grafana"
 "Check the performance metrics of the faro-shop-worker service to ensure it is running optimally on the Grafana observability platform"
 "Check the performance metrics of the Grafana Home Page and ensure it meets the required uptime and response time standards for your website monitoring needs on Grafana Synthetic Monitoring."
 "Check the performance metrics of the Grafana Home Page and ensure its uptime and response time are within acceptable limits on Grafana's synthetic monitoring dashboard"
 "Check the performance metrics of the Grafana Home Page service on Grafana Play"
 "Check the performance metrics of the Grafana Home Page service to ensure its uptime and latency are within acceptable limits on Grafana Play"
 "Check the performance metrics of the Grafana website, including uptime and latency, to ensure it meets your requirements for a reliable monitoring service on the Grafana Synthetic Monitoring application."
 "Check the performance metrics of the website monitoring checks to ensure uptime and low latency on Grafana Synthetic Monitoring"
 "Check the performance of services in the application to identify any issues on the Grafana observability app"
 "Check the performance of the faro-shop-frontend service on the Grafana application"
 "Explore and view the flowcharting options demo to understand how to create flowcharts for data visualization on Grafana Play"
 "Explore Grafana's data visualization tools and features for monitoring cloud services on grafana.com"
 "Find and explore flowchart animation examples for dashboard creation on Grafana Play"
 "In summary, the answer is: Check the performance metrics of the faro-shop-backend service to ensure it is running smoothly on the Grafana monitoring dashboard"
 "Reset my password to access my Grafana account and manage my data visualizations on the Grafana website"
 "Reset my password to access my Grafana account and manage my data visualizations on the Grafana website"
 "Set up alert rules based on example dashboards on Grafana Play"
 "View a detailed example of a flowcharting rack diagram to understand its features and functionality on Grafana Play"
 "View a flowcharting example of technical architecture to understand its visualization in Grafana on Grafana Play"
 "View a flowcharting floorplan example for business metrics on Grafana Play"
 "View and compare different flowcharting network diagram examples to understand their features and functionalities on Grafana Play"
 "View and compare the performance metrics of different synthetic monitoring checks for the Grafana service, focusing on uptime and latency, to ensure optimal service availability on play.grafana.org"
 "View examples of hierarchical state level flowcharting capabilities on Grafana"
 "View performance metrics with sparklines to identify trends and make informed decisions on Grafana"
 "View performance metrics with sparklines to identify trends and make informed decisions on Grafana"
--- a/misc/temp_analysis/result_clean.py
+++ b/misc/temp_analysis/result_clean.py
@ -0,0 +1,60 @@
 import json
 import random
 from pathlib import Path
 # 读取 results.json 文件
 with open('temp_analysis/results.json', 'r', encoding='utf-8') as f:
    results = json.load(f)
 # 筛选 is_correct 为 true 的条目
 correct_results = [item for item in results if item.get('is_correct') is True]
 print(f"找到 {len(correct_results)} 个正确的条目")
 # 读取 exam.json 文件
 with open('temp_analysis/exam.json', 'r', encoding='utf-8') as f:
    exam_data = json.load(f)
 # 创建 id 到 exam 条目的映射
 exam_map = {item.get('id'): item for item in exam_data}
 # 为每个正确的结果添加 answer_text 字段
 for result in correct_results:
    result_id = result.get('id')
    if result_id in exam_map:
        result['answer_text'] = exam_map[result_id].get('answer_text', '')
    else:
        print(f"警告: ID {result_id} 在 exam.json 中未找到")
 with open('temp_analysis/process_3.json', 'r', encoding='utf-8') as f:
    trajectory_data = json.load(f)
 trajectory_map = {item.get('title'): item for key, item in trajectory_data.items()}
 for result in correct_results:
    task = result.get('task')
    if task in trajectory_map:
        step = result.get('trajectory_step_num') - 1
        result['page_child_num'] = trajectory_map[task]['shortestPathsMeta'][0]['chainChildNum'][step]
    else:
        print(f"警告: {task} 在 trajectory_map 中未找到")
 # 随机打乱数据
 random.seed(42)  # 设置随机种子以确保结果可重现
 random.shuffle(correct_results)
 # 按照 7:3 比例划分训练集和测试集
 split_index = int(len(correct_results) * 0.7)
 train_data = correct_results[:split_index]
 test_data = correct_results[split_index:]
 print(f"训练集大小: {len(train_data)}")
 print(f"测试集大小: {len(test_data)}")
 # 保存训练集和测试集
 with open('temp_analysis/train.json', 'w', encoding='utf-8') as f:
    json.dump(train_data, f, ensure_ascii=False, indent=2)
 with open('temp_analysis/test.json', 'w', encoding='utf-8') as f:
    json.dump(test_data, f, ensure_ascii=False, indent=2)
 print("处理完成! 数据已保存到 temp_analysis/train.json 和 temp_analysis/test.json")
--- a/misc/temp_analysis/results
+++ b/misc/temp_analysis/results
--- a/misc/temp_analysis/results.json
+++ b/misc/temp_analysis/results.json
--- a/misc/temp_analysis/results_50_success.json
+++ b/misc/temp_analysis/results_50_success.json
--- a/misc/temp_analysis/results_correct.json
+++ b/misc/temp_analysis/results_correct.json
--- a/misc/temp_analysis/results_qwq.json
+++ b/misc/temp_analysis/results_qwq.json
--- a/misc/temp_analysis/results_vl.json
+++ b/misc/temp_analysis/results_vl.json
@ -0,0 +1 @@
 []
--- a/misc/temp_analysis/test.json
+++ b/misc/temp_analysis/test.json
--- a/misc/temp_analysis/test_run
+++ b/misc/temp_analysis/test_run
@ -0,0 +1,243 @@
 import json
 import os
 import re
 import logging
 from openai import OpenAI
 from dotenv import load_dotenv
 # 设置日志
 # 创建logger
 log = logging.getLogger(__name__)
 log.setLevel(logging.INFO)
 # 创建文件处理器
 file_handler = logging.FileHandler('temp_analysis/test_run.log')
 file_handler.setLevel(logging.INFO)
 # 创建控制台处理器
 console_handler = logging.StreamHandler()
 console_handler.setLevel(logging.INFO)
 # 创建格式器并添加到处理器
 formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 file_handler.setFormatter(formatter)
 console_handler.setFormatter(formatter)
 # 将处理器添加到logger
 log.addHandler(file_handler)
 log.addHandler(console_handler)
 # 加载环境变量
 load_dotenv()
 # 配置OpenAI客户端
 client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
    base_url=os.getenv("OPENAI_API_BASE_URL")  # 如果使用其他兼容服务，可以设置基础URL
 )
 # modified version of the task proposer agent prompt from https://arxiv.org/pdf/2502.11357
 # System prompt
 cot_system_prompt = """
 What does this webpage show? Imagine you are a real user on this webpage. Given the webpage
 screenshot or ocr result and parsed HTML/accessibility tree and the task description, please provide 
 the first action towards completing that task.
 Do the following step by step:
 1. Given the webpage screenshot or ocr result and parsed HTML/accessibility tree, generate the first action
 towards completing that task (in natural language form).
 2. Given the webpage screenshot or ocr result, parsed HTML/accessibility tree, and the natural language
 action, generate the grounded version of that action.
 ACTION SPACE: Your action space is: ['click [element ID]', 'type [element ID] [content]',
 'select [element ID] [content of option to select]', 'scroll [up]', 'scroll [down]', and 'stop'].
 Action output should follow the syntax as given below:
 click [element ID]: This action clicks on an element with a specific ID on the webpage.
 type [element ID] [content]: Use this to type the content into the field with id. By default, the
 "Enter" key is pressed after typing. Both the content and the ID should be within square braces
 as per the syntax.
 select [element ID] [content of option to select]: Select an option from a dropdown menu. The
 content of the option to select should be within square braces. When you get (select an option)
 tags from the accessibility tree, you need to select the serial number (element_id) corresponding
 to the select tag, not the option, and select the most likely content corresponding to the option as
 input.
 scroll [down]: Scroll the page down.
 scroll [up]: Scroll the page up.
 IMPORTANT: 
 To be successful, it is important to STRICTLY follow the below rules:
 Action generation rules:
 1. You should generate a single atomic action at each step.
 2. The action should be an atomic action from the given vocabulary - click, type, select, scroll
 (up or down), or stop.
 3. The arguments to each action should be within square braces. For example, "click [127]",
 "type [43] [content to type]", "scroll [up]", "scroll [down]".
 4. The natural language form of action (corresponding to the field "action_in_natural_language")
 should be consistent with the grounded version of the action (corresponding to the field "grounded
 _action"). Do NOT add any additional information in the grounded action. For example, if a
 particular element ID is specified in the grounded action, a description of that element must be
 present in the natural language action.
 5. If the type action is selected, the natural language form of action ("action_in_natural_language") should always specify the actual text to be typed.
 6. You should issue a "stop" action if the current webpage asks to log in or for credit card
 information.
 7. To input text, there is NO need to click the textbox first, directly type content. After typing,
 the system automatically hits the 'ENTER' key.
 8. STRICTLY Avoid repeating the same action (click/type) if the webpage remains unchanged.
 You may have selected the wrong web element.
 9. Do NOT use quotation marks in the action generation.
 OUTPUT FORMAT: 
 Please give a short analysis of the screenshot, parsed
 HTML/accessibility tree, then put your answer within ``` ```, for example,
 "In summary, the proposed task and the corresponding action is: ```{
 "action_in_natural_language":<ACTION_IN_NATURAL_LANGUAGE>:str,
 "grounded_action": <ACTION>:str}"```
 """
 # User prompt
 cot_user_prompt = """
 Website URL: {INIT_URL}
 Parsed HTML/Accessibility Tree: {A11Y_TREE}
 Screenshot ocr result: {SCREENSHOT}
 Task description: {TASK_DESCRIPTION}
 """
 def call_api(messages):
    """使用openai库调用API接口"""
    try:
        response = client.chat.completions.create(
            messages=messages,
            model="gpt-4o-mini"
        )
        return response
    except Exception as e:
        log.error(f"API调用出错: {e}")
        return None
 def extract_action(response_text):
    """从API响应中提取action_in_natural_language和grounded_action"""
    # 使用正则表达式提取JSON部分
    match = re.search(r'```\s*{(.+?)}```', response_text, re.DOTALL)
    if match:
        try:
            # 构建完整的JSON字符串并解析
            json_str = "{" + match.group(1) + "}"
            action_data = json.loads(json_str)
            return action_data.get("action_in_natural_language"), action_data.get("grounded_action")
        except json.JSONDecodeError:
            log.error(f"无法解析JSON: {match.group(1)}")
    return None, None
 def check_answer(grounded_action, answer):
    """检查grounded_action是否匹配answer"""
    if not grounded_action or not grounded_action.startswith("click"):
        return False
    # 提取element ID
    match = re.search(r'click \[(\d+)\]', grounded_action)
    if not match:
        return False
    element_id = match.group(1)
    # 将answer拆分为列表（逗号分隔）
    answer_ids = [id.strip() for id in answer.split(",")]
    # 检查element ID是否在answer列表中
    return element_id in answer_ids
 def main():
    # 读取exam.json
    with open("temp_analysis/exam.json", "r") as f:
        exam_data = json.load(f)
    results = []
    # 遍历exam.json中的每个项目
    for item in exam_data:
        item_id = item["id"]
        url = item["url"]
        task_description = item["question"]
        answer = item["answer"]
        answer_text = item["answer_text"]
        log.info(f"处理ID: {item_id}, URL: {url}")
        # 读取a11y tree
        try:
            with open(f"axtrees/{item_id}.txt", "r") as f:  # 修正文件路径
                a11y_tree = f.read()
        except FileNotFoundError:
            log.error(f"找不到文件: axtrees/{item_id}.txt")
            continue
        # 构建API请求
        messages = [
            {"role": "system", "content": cot_system_prompt},
            {"role": "user", "content": cot_user_prompt.format(
                INIT_URL=url,
                A11Y_TREE=a11y_tree,
                SCREENSHOT="",  # 这里没有提供截图OCR结果
                TASK_DESCRIPTION=task_description
            )}
        ]
        log.info(f"task_description: {task_description}")
        log.info(f"answer: {answer}, answer_text: {answer_text}")
        # 调用API
        response = call_api(messages)
        if response and hasattr(response, 'choices') and len(response.choices) > 0:
            content = response.choices[0].message.content
            log.info(f"content: {content}")
            # 提取action
            action_nl, grounded_action = extract_action(content)
            # 检查答案
            is_correct = check_answer(grounded_action, answer)
            # 记录结果
            result = {
                "id": item_id,
                "url": url,
                "task": task_description,
                "action_nl": action_nl,
                "grounded_action": grounded_action,
                "expected_answer": answer,
                "is_correct": is_correct
            }
            results.append(result)
            log.info(f"ID: {item_id}")
            log.info(f"任务: {task_description}")
            log.info(f"动作: {grounded_action}")
            log.info(f"是否正确: {is_correct}")
            log.info("-" * 50)
        else:
            log.error(f"API调用失败: {response}")
    # 保存结果
    with open("temp_analysis/results.json", "w") as f:
        json.dump(results, f, indent=2)
    # 计算正确率
    correct_count = sum(1 for r in results if r["is_correct"])
    total_count = len(results)
    accuracy = correct_count / total_count if total_count > 0 else 0
    log.info(f"总计: {total_count}题，正确: {correct_count}题，正确率: {accuracy:.2%}")
 if __name__ == "__main__":
    main()
--- a/misc/temp_analysis/test_run
+++ b/misc/temp_analysis/test_run
@ -0,0 +1,276 @@
 import json
 import os
 import re
 import logging
 import concurrent.futures
 from openai import OpenAI
 from dotenv import load_dotenv
 # 设置日志
 # 创建logger
 log = logging.getLogger(__name__)
 log.setLevel(logging.INFO)
 # 创建文件处理器
 file_handler = logging.FileHandler('temp_analysis/test_run.log')
 file_handler.setLevel(logging.INFO)
 # 创建控制台处理器
 console_handler = logging.StreamHandler()
 console_handler.setLevel(logging.INFO)
 # 创建格式器并添加到处理器
 formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 file_handler.setFormatter(formatter)
 console_handler.setFormatter(formatter)
 # 将处理器添加到logger
 log.addHandler(file_handler)
 log.addHandler(console_handler)
 # 加载环境变量
 load_dotenv()
 # 配置OpenAI客户端
 client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
    base_url=os.getenv("OPENAI_API_BASE_URL")  # 如果使用其他兼容服务，可以设置基础URL
 )
 # modified version of the task proposer agent prompt from https://arxiv.org/pdf/2502.11357
 # System prompt
 cot_system_prompt = """
 What does this webpage show? Imagine you are a real user on this webpage. Given the webpage
 screenshot or ocr result and parsed HTML/accessibility tree and the task description, please provide 
 the first action towards completing that task.
 Do the following step by step:
 1. Given the webpage screenshot or ocr result and parsed HTML/accessibility tree, generate the first action
 towards completing that task (in natural language form).
 2. Given the webpage screenshot or ocr result, parsed HTML/accessibility tree, and the natural language
 action, generate the grounded version of that action.
 ACTION SPACE: Your action space is: ['click [element ID]', 'type [element ID] [content]',
 'select [element ID] [content of option to select]', 'scroll [up]', 'scroll [down]', and 'stop'].
 Action output should follow the syntax as given below:
 click [element ID]: This action clicks on an element with a specific ID on the webpage.
 type [element ID] [content]: Use this to type the content into the field with id. By default, the
 "Enter" key is pressed after typing. Both the content and the ID should be within square braces
 as per the syntax.
 select [element ID] [content of option to select]: Select an option from a dropdown menu. The
 content of the option to select should be within square braces. When you get (select an option)
 tags from the accessibility tree, you need to select the serial number (element_id) corresponding
 to the select tag, not the option, and select the most likely content corresponding to the option as
 input.
 scroll [down]: Scroll the page down.
 scroll [up]: Scroll the page up.
 IMPORTANT: 
 To be successful, it is important to STRICTLY follow the below rules:
 Action generation rules:
 1. You should generate a single atomic action at each step.
 2. The action should be an atomic action from the given vocabulary - click, type, select, scroll
 (up or down), or stop.
 3. The arguments to each action should be within square braces. For example, "click [127]",
 "type [43] [content to type]", "scroll [up]", "scroll [down]".
 4. The natural language form of action (corresponding to the field "action_in_natural_language")
 should be consistent with the grounded version of the action (corresponding to the field "grounded
 _action"). Do NOT add any additional information in the grounded action. For example, if a
 particular element ID is specified in the grounded action, a description of that element must be
 present in the natural language action.
 5. If the type action is selected, the natural language form of action ("action_in_natural_language") should always specify the actual text to be typed.
 6. You should issue a "stop" action if the current webpage asks to log in or for credit card
 information.
 7. To input text, there is NO need to click the textbox first, directly type content. After typing,
 the system automatically hits the 'ENTER' key.
 8. STRICTLY Avoid repeating the same action (click/type) if the webpage remains unchanged.
 You may have selected the wrong web element.
 9. Do NOT use quotation marks in the action generation.
 OUTPUT FORMAT: 
 Please give a short analysis of the screenshot, parsed
 HTML/accessibility tree, then put your answer within ``` ```, for example,
 "In summary, the proposed task and the corresponding action is: ```{
 "action_in_natural_language":<ACTION_IN_NATURAL_LANGUAGE>:str,
 "grounded_action": <ACTION>:str}"```
 """
 # User prompt
 cot_user_prompt = """
 Website URL: {INIT_URL}
 Parsed HTML/Accessibility Tree: {A11Y_TREE}
 Screenshot ocr result: {SCREENSHOT}
 Task description: {TASK_DESCRIPTION}
 """
 def call_api(messages):
    """使用openai库调用API接口"""
    try:
        response = client.chat.completions.create(
            messages=messages,
            # model="gpt-4o-mini"
            model="aiproxy/deepseek-reasoner"
        )
        return response
    except Exception as e:
        log.error(f"API调用出错: {e}")
        return None
 def extract_action(response_text):
    """从API响应中提取action_in_natural_language和grounded_action"""
    # 使用正则表达式提取JSON部分
    match = re.search(r'```\s*{(.+?)}```', response_text, re.DOTALL)
    if match:
        try:
            # 构建完整的JSON字符串并解析
            json_str = "{" + match.group(1) + "}"
            action_data = json.loads(json_str)
            return action_data.get("action_in_natural_language"), action_data.get("grounded_action")
        except json.JSONDecodeError:
            log.error(f"无法解析JSON: {match.group(1)}")
    return None, None
 def check_answer(grounded_action, answer):
    """检查grounded_action是否匹配answer"""
    if not grounded_action or not grounded_action.startswith("click"):
        return False
    # 提取element ID
    match = re.search(r'click \[(\d+)\]', grounded_action)
    if not match:
        return False
    element_id = match.group(1)
    # 将answer拆分为列表（逗号分隔）
    answer_ids = [id.strip() for id in answer.split(",")]
    # 检查element ID是否在answer列表中
    return element_id in answer_ids
 def process_item(item):
    """处理单个测试项目"""
    item_id = item["id"]
    url = item["url"]
    task_description = item["question"]
    answer = item["answer"]
    answer_text = item["answer_text"]
    log.info(f"处理ID: {item_id}, URL: {url}")
    # 读取a11y tree
    try:
        with open(f"axtrees/{item_id}.txt", "r") as f:
            a11y_tree = f.read()
    except FileNotFoundError:
        log.error(f"找不到文件: axtrees/{item_id}.txt")
        return None
    # 构建API请求
    messages = [
        {"role": "system", "content": cot_system_prompt},
        {"role": "user", "content": cot_user_prompt.format(
            INIT_URL=url,
            A11Y_TREE=a11y_tree,
            SCREENSHOT="",  # 这里没有提供截图OCR结果
            TASK_DESCRIPTION=task_description
        )}
    ]
    log.info(f"task_description: {task_description}")
    log.info(f"answer: {answer}, answer_text: {answer_text}")
    # 调用API
    response = call_api(messages)
    if response and hasattr(response, 'choices') and len(response.choices) > 0:
        content = response.choices[0].message.content
        reasoning_content = response.choices[0].message.reasoning_content
        log.info(f"reasoning_content: {reasoning_content}")
        log.info(f"content: {content}")
        # 提取action
        action_nl, grounded_action = extract_action(content)
        # 检查答案
        is_correct = check_answer(grounded_action, answer)
        # 记录结果
        result = {
            "id": item_id,
            "url": url,
            "task": task_description,
            "expected_answer": answer,
            "thinking": reasoning_content,
            "action_nl": action_nl,
            "grounded_action": grounded_action,
            "is_correct": is_correct
        }
        log.info(f"ID: {item_id}")
        log.info(f"任务: {task_description}")
        log.info(f"动作: {grounded_action}")
        log.info(f"是否正确: {is_correct}")
        log.info("-" * 50)
        return result
    else:
        log.error(f"API调用失败: {response}")
        return None
 def main():
    # 读取exam.json
    with open("temp_analysis/exam.json", "r") as f:
        exam_data = json.load(f)
    results = []
    total_items = len(exam_data)
    completed = 0
    success_count = 0
    fail_count = 0
    parallel = 4
    log.info(f"开始测试，总共 {total_items} 个任务")
    # 使用线程池并发处理
    with concurrent.futures.ThreadPoolExecutor(max_workers=parallel) as executor:
        # 提交所有任务
        future_to_item = {executor.submit(process_item, item): item for item in exam_data}
        # 收集结果
        for future in concurrent.futures.as_completed(future_to_item):
            result = future.result()
            completed += 1
            if result:
                results.append(result)
                if result["is_correct"]:
                    success_count += 1
                else:
                    fail_count += 1
            else:
                fail_count += 1
            # 打印当前进度
            progress = (completed / total_items) * 100
            log.info(f"进度: {progress:.2f}% ({completed}/{total_items}) - 成功: {success_count}, 失败: {fail_count}")
    # 保存结果
    with open("temp_analysis/results.json", "w") as f:
        json.dump(results, f, indent=2)
    # 计算正确率
    accuracy = success_count / total_items if total_items > 0 else 0
    log.info(f"测试完成! 总计: {total_items}题，正确: {success_count}题，错误: {fail_count}题，正确率: {accuracy:.2%}")
 if __name__ == "__main__":
    main()
--- a/misc/temp_analysis/test_run
+++ b/misc/temp_analysis/test_run
@ -0,0 +1,364 @@
 import json
 import os
 import re
 import logging
 import concurrent.futures
 import argparse
 from openai import OpenAI
 from dotenv import load_dotenv
 # 设置日志
 # 创建logger
 log = logging.getLogger(__name__)
 log.setLevel(logging.INFO)
 # 创建文件处理器
 file_handler = logging.FileHandler('temp_analysis/test_run.log')
 file_handler.setLevel(logging.INFO)
 # 创建控制台处理器
 console_handler = logging.StreamHandler()
 console_handler.setLevel(logging.INFO)
 # 创建格式器并添加到处理器
 formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 file_handler.setFormatter(formatter)
 console_handler.setFormatter(formatter)
 # 将处理器添加到logger
 log.addHandler(file_handler)
 log.addHandler(console_handler)
 # 加载环境变量
 load_dotenv()
 # 配置OpenAI客户端
 client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
    base_url=os.getenv("OPENAI_API_BASE_URL")  # 如果使用其他兼容服务，可以设置基础URL
 )
 # modified version of the task proposer agent prompt from https://arxiv.org/pdf/2502.11357
 # System prompt
 cot_system_prompt = """
 What does this webpage show? Imagine you are a real user on this webpage. Given the webpage
 screenshot or ocr result and parsed HTML/accessibility tree and the task description, please provide 
 the first action towards completing that task.
 Do the following step by step:
 1. Given the webpage screenshot or ocr result and parsed HTML/accessibility tree, generate the first action
 towards completing that task (in natural language form).
 2. Given the webpage screenshot or ocr result, parsed HTML/accessibility tree, and the natural language
 action, generate the grounded version of that action.
 ACTION SPACE: Your action space is: ['click [element ID]', 'type [element ID] [content]',
 'select [element ID] [content of option to select]', 'scroll [up]', 'scroll [down]', and 'stop'].
 Action output should follow the syntax as given below:
 click [element ID]: This action clicks on an element with a specific ID on the webpage.
 type [element ID] [content]: Use this to type the content into the field with id. By default, the
 "Enter" key is pressed after typing. Both the content and the ID should be within square braces
 as per the syntax.
 select [element ID] [content of option to select]: Select an option from a dropdown menu. The
 content of the option to select should be within square braces. When you get (select an option)
 tags from the accessibility tree, you need to select the serial number (element_id) corresponding
 to the select tag, not the option, and select the most likely content corresponding to the option as
 input.
 scroll [down]: Scroll the page down.
 scroll [up]: Scroll the page up.
 IMPORTANT: 
 To be successful, it is important to STRICTLY follow the below rules:
 Action generation rules:
 1. You should generate a single atomic action at each step.
 2. The action should be an atomic action from the given vocabulary - click, type, select, scroll
 (up or down), or stop.
 3. The arguments to each action should be within square braces. For example, "click [127]",
 "type [43] [content to type]", "scroll [up]", "scroll [down]".
 4. The natural language form of action (corresponding to the field "action_in_natural_language")
 should be consistent with the grounded version of the action (corresponding to the field "grounded
 _action"). Do NOT add any additional information in the grounded action. For example, if a
 particular element ID is specified in the grounded action, a description of that element must be
 present in the natural language action.
 5. If the type action is selected, the natural language form of action ("action_in_natural_language") should always specify the actual text to be typed.
 6. You should issue a "stop" action if the current webpage asks to log in or for credit card
 information.
 7. To input text, there is NO need to click the textbox first, directly type content. After typing,
 the system automatically hits the 'ENTER' key.
 8. STRICTLY Avoid repeating the same action (click/type) if the webpage remains unchanged.
 You may have selected the wrong web element.
 9. Do NOT use quotation marks in the action generation.
 OUTPUT FORMAT: 
 Please give a short analysis of the screenshot, parsed
 HTML/accessibility tree, then put your answer within ``` ```, for example,
 "In summary, the proposed task and the corresponding action is: ```{
 "action_in_natural_language":<ACTION_IN_NATURAL_LANGUAGE>:str,
 "grounded_action": <ACTION>:str}"```
 """
 # User prompt
 cot_user_prompt = """
 Website URL: {INIT_URL}
 Parsed HTML/Accessibility Tree: {A11Y_TREE}
 Screenshot ocr result: {SCREENSHOT}
 Task description: {TASK_DESCRIPTION}
 """
 def call_api(messages):
    """使用openai库调用API接口"""
    try:
        response = client.chat.completions.create(
            messages=messages,
            # model="gpt-4o-mini"
            model="aiproxy/deepseek-reasoner"
        )
        return response
    except Exception as e:
        log.error(f"API调用出错: {e}")
        return None
 def extract_action(response_text):
    """从API响应中提取action_in_natural_language和grounded_action"""
    # 使用正则表达式提取JSON部分
    match = re.search(r'```\s*{(.+?)}```', response_text, re.DOTALL)
    if match:
        try:
            # 构建完整的JSON字符串并解析
            json_str = "{" + match.group(1) + "}"
            action_data = json.loads(json_str)
            return action_data.get("action_in_natural_language"), action_data.get("grounded_action")
        except json.JSONDecodeError:
            log.error(f"无法解析JSON: {match.group(1)}")
    return None, None
 def check_answer(grounded_action, answer):
    """检查grounded_action是否匹配answer"""
    if not grounded_action or not grounded_action.startswith("click"):
        return False
    # 提取element ID
    match = re.search(r'click \[(\d+)\]', grounded_action)
    if not match:
        return False
    element_id = match.group(1)
    # 将answer拆分为列表（逗号分隔）
    answer_ids = [id.strip() for id in answer.split(",")]
    # 检查element ID是否在answer列表中
    return element_id in answer_ids
 def process_item(item):
    """处理单个测试项目"""
    item_id = item["id"]
    url = item["url"]
    task_description = item["question"]
    answer = item["answer"]
    answer_text = item["answer_text"]
    log.info(f"处理ID: {item_id}, URL: {url}")
    # 读取a11y tree
    try:
        with open(f"axtrees/{item_id}.txt", "r") as f:
            a11y_tree = f.read()
    except FileNotFoundError:
        log.error(f"找不到文件: axtrees/{item_id}.txt")
        return None
    # 构建API请求
    messages = [
        {"role": "system", "content": cot_system_prompt},
        {"role": "user", "content": cot_user_prompt.format(
            INIT_URL=url,
            A11Y_TREE=a11y_tree,
            SCREENSHOT="",  # 这里没有提供截图OCR结果
            TASK_DESCRIPTION=task_description
        )}
    ]
    log.info(f"task_description: {task_description}")
    log.info(f"answer: {answer}, answer_text: {answer_text}")
    # 调用API
    response = call_api(messages)
    if response and hasattr(response, 'choices') and len(response.choices) > 0:
        content = response.choices[0].message.content
        reasoning_content = response.choices[0].message.reasoning_content
        log.info(f"reasoning_content: {reasoning_content}")
        log.info(f"content: {content}")
        # 提取action
        action_nl, grounded_action = extract_action(content)
        # 检查答案
        is_correct = check_answer(grounded_action, answer)
        # 记录结果
        result = {
            "id": item_id,
            "url": url,
            "task": task_description,
            "expected_answer": answer,
            "thinking": reasoning_content,
            "action_nl": action_nl,
            "grounded_action": grounded_action,
            "is_correct": is_correct
        }
        log.info(f"ID: {item_id}")
        log.info(f"任务: {task_description}")
        log.info(f"动作: {grounded_action}")
        log.info(f"是否正确: {is_correct}")
        log.info("-" * 50)
        return result
    else:
        log.error(f"API调用失败: {response}")
        return None
 def main():
    # 解析命令行参数
    parser = argparse.ArgumentParser(description='运行测试或重跑失败的测试')
    parser.add_argument('--rerun_failure', action='store_true', help='重跑失败的测试用例')
    args = parser.parse_args()
    if args.rerun_failure:
        # 重跑失败的测试
        try:
            with open("temp_analysis/results.json", "r") as f:
                results = json.load(f)
            # 筛选出失败的测试用例
            failed_results = [r for r in results if not r.get("is_correct", False)]
            if not failed_results:
                log.info("没有找到失败的测试用例，无需重跑")
                return
            # 读取exam.json以获取完整的测试数据
            with open("temp_analysis/exam.json", "r") as f:
                exam_data = json.load(f)
            # 创建ID到测试项的映射
            exam_map = {item["id"]: item for item in exam_data}
            # 准备重跑的测试项
            rerun_items = []
            for result in failed_results:
                item_id = result["id"]
                if item_id in exam_map:
                    rerun_items.append(exam_map[item_id])
            total_items = len(rerun_items)
            log.info(f"开始重跑失败的测试，总共 {total_items} 个任务")
            # 创建ID到结果索引的映射，用于更新结果
            result_indices = {r["id"]: i for i, r in enumerate(results)}
            completed = 0
            success_count = 0
            fail_count = 0
            # 使用线程池并发处理
            with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
                # 提交所有任务
                future_to_item = {executor.submit(process_item, item): item for item in rerun_items}
                # 收集结果
                for future in concurrent.futures.as_completed(future_to_item):
                    item = future_to_item[future]
                    result = future.result()
                    completed += 1
                    if result:
                        # 更新结果
                        if result["id"] in result_indices:
                            results[result_indices[result["id"]]] = result
                        else:
                            results.append(result)
                        if result["is_correct"]:
                            success_count += 1
                        else:
                            fail_count += 1
                    else:
                        fail_count += 1
                    # 打印当前进度
                    progress = (completed / total_items) * 100
                    log.info(f"重跑进度: {progress:.2f}% ({completed}/{total_items}) - 成功: {success_count}, 失败: {fail_count}")
            # 保存更新后的结果
            with open("temp_analysis/results.json", "w") as f:
                json.dump(results, f, indent=2)
            # 计算新的正确率
            all_correct = sum(1 for r in results if r.get("is_correct", False))
            all_total = len(results)
            accuracy = all_correct / all_total if all_total > 0 else 0
            log.info(f"重跑完成! 重跑: {total_items}题，成功: {success_count}题，失败: {fail_count}题")
            log.info(f"总计: {all_total}题，正确: {all_correct}题，正确率: {accuracy:.2%}")
        except FileNotFoundError:
            log.error("找不到results.json文件，请先运行完整测试")
            return
    else:
        # 运行完整测试
        # 读取exam.json
        with open("temp_analysis/exam.json", "r") as f:
            exam_data = json.load(f)
        results = []
        total_items = len(exam_data)
        completed = 0
        success_count = 0
        fail_count = 0
        log.info(f"开始测试，总共 {total_items} 个任务")
        # 使用线程池并发处理
        with concurrent.futures.ThreadPoolExecutor(max_workers=1, thread_name_prefix="test_run") as executor:
            # 提交所有任务
            future_to_item = {executor.submit(process_item, item): item for item in exam_data}
            # 收集结果
            for future in concurrent.futures.as_completed(future_to_item):
                result = future.result()
                completed += 1
                if result:
                    results.append(result)
                    if result["is_correct"]:
                        success_count += 1
                    else:
                        fail_count += 1
                else:
                    fail_count += 1
                # 打印当前进度
                progress = (completed / total_items) * 100
                log.info(f"进度: {progress:.2f}% ({completed}/{total_items}) - 成功: {success_count}, 失败: {fail_count}")
        # 保存结果
        with open("temp_analysis/results.json", "w") as f:
            json.dump(results, f, indent=2)
        # 计算正确率
        accuracy = success_count / total_items if total_items > 0 else 0
        log.info(f"测试完成! 总计: {total_items}题，正确: {success_count}题，错误: {fail_count}题，正确率: {accuracy:.2%}")
 if __name__ == "__main__":
    main()
--- a/misc/temp_analysis/test_run
+++ b/misc/temp_analysis/test_run
@ -0,0 +1,84 @@
 # modified version of the task proposer agent prompt from https://arxiv.org/pdf/2502.11357
 # System prompt
 cot_system_prompt = """
 What does this webpage show? Imagine you are a real user on this webpage. Given the webpage
 screenshot or ocr result and parsed HTML/accessibility tree and the task description, please provide 
 the first action towards completing that task.
 Do the following step by step:
 1. Given the webpage screenshot or ocr result and parsed HTML/accessibility tree, generate the first action
 towards completing that task (in natural language form).
 2. Given the webpage screenshot or ocr result, parsed HTML/accessibility tree, and the natural language
 action, generate the grounded version of that action.
 ACTION SPACE: Your action space is: ['click [element ID]', 'type [element ID] [content]',
 'select [element ID] [content of option to select]', 'scroll [up]', 'scroll [down]', and 'stop'].
 Action output should follow the syntax as given below:
 click [element ID]: This action clicks on an element with a specific ID on the webpage.
 type [element ID] [content]: Use this to type the content into the field with id. By default, the
 "Enter" key is pressed after typing. Both the content and the ID should be within square braces
 as per the syntax.
 select [element ID] [content of option to select]: Select an option from a dropdown menu. The
 content of the option to select should be within square braces. When you get (select an option)
 tags from the accessibility tree, you need to select the serial number (element_id) corresponding
 to the select tag, not the option, and select the most likely content corresponding to the option as
 input.
 scroll [down]: Scroll the page down.
 scroll [up]: Scroll the page up.
 IMPORTANT: 
 To be successful, it is important to STRICTLY follow the below rules:
 Action generation rules:
 1. You should generate a single atomic action at each step.
 2. The action should be an atomic action from the given vocabulary - click, type, select, scroll
 (up or down), or stop.
 3. The arguments to each action should be within square braces. For example, "click [127]",
 "type [43] [content to type]", "scroll [up]", "scroll [down]".
 4. The natural language form of action (corresponding to the field "action_in_natural_language")
 should be consistent with the grounded version of the action (corresponding to the field "grounded
 _action"). Do NOT add any additional information in the grounded action. For example, if a
 particular element ID is specified in the grounded action, a description of that element must be
 present in the natural language action.
 5. If the type action is selected, the natural language form of action ("action_in_natural_language") should always specify the actual text to be typed.
 6. You should issue a “stop” action if the current webpage asks to log in or for credit card
 information.
 7. To input text, there is NO need to click the textbox first, directly type content. After typing,
 the system automatically hits the ‘ENTER’ key.
 8. STRICTLY Avoid repeating the same action (click/type) if the webpage remains unchanged.
 You may have selected the wrong web element.
 9. Do NOT use quotation marks in the action generation.
 OUTPUT FORMAT: 
 Please give a short analysis of the screenshot, parsed
 HTML/accessibility tree, then put your answer within ``` ```, for example,
 "In summary, the proposed task and the corresponding action is: ```{
 "action_in_natural_language":<ACTION_IN_NATURAL_LANGUAGE>:str,
 "grounded_action": <ACTION>:str}"```
 """
 # User prompt
 cot_user_prompt = """
 Website URL: {INIT_URL}
 Parsed HTML/Accessibility Tree: {A11Y_TREE}
 Screenshot ocr result: {SCREENSHOT}
 Task description: {TASK_DESCRIPTION}
 """
 # load json from path/processed_3_with_analysis_20250324163759.json
 with open('path/processed_3_with_analysis_20250324163759.json', 'r') as f:
    data = json.load(f)
 # loop through each key in the json
 for key, value in data.items():
    # print the key
    url = key
    shortestPathsMeta = value['shortestPathsMeta']
    for shortestPathMeta in shortestPathsMeta:
        chainTexts = shortestPathMeta['chainTexts']
        for chainText in chainTexts:
            print(chainText)
--- a/misc/temp_analysis/test_run.log
+++ b/misc/temp_analysis/test_run.log
--- a/misc/temp_analysis/test_run_retry.py
+++ b/misc/temp_analysis/test_run_retry.py
@ -0,0 +1,415 @@
 import json
 import os
 import re
 import logging
 import concurrent.futures
 import argparse
 import threading
 import datetime
 from openai import OpenAI
 from dotenv import load_dotenv
 # 设置日志
 # 创建logger
 log = logging.getLogger(__name__)
 log.setLevel(logging.INFO)
 # 创建文件处理器
 file_handler = logging.FileHandler('temp_analysis/test_run.log')
 file_handler.setLevel(logging.INFO)
 # 创建控制台处理器
 console_handler = logging.StreamHandler()
 console_handler.setLevel(logging.INFO)
 # 创建格式器并添加到处理器
 formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 file_handler.setFormatter(formatter)
 console_handler.setFormatter(formatter)
 # 将处理器添加到logger
 log.addHandler(file_handler)
 log.addHandler(console_handler)
 # 加载环境变量
 load_dotenv()
 # 配置OpenAI客户端
 client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
    base_url=os.getenv("OPENAI_API_BASE_URL")  # 如果使用其他兼容服务，可以设置基础URL
 )
 # modified version of the task proposer agent prompt from https://arxiv.org/pdf/2502.11357
 # System prompt
 cot_system_prompt = """
 What does this webpage show? Imagine you are a real user on this webpage. Given the webpage
 screenshot or ocr result and parsed HTML/accessibility tree and the task description, please provide 
 the first action towards completing that task.
 Do the following step by step:
 1. Given the webpage screenshot or ocr result and parsed HTML/accessibility tree, generate the first action
 towards completing that task (in natural language form).
 2. Given the webpage screenshot or ocr result, parsed HTML/accessibility tree, and the natural language
 action, generate the grounded version of that action.
 ACTION SPACE: Your action space is: ['click [element ID]', 'type [element ID] [content]',
 'select [element ID] [content of option to select]', 'scroll [up]', 'scroll [down]', and 'stop'].
 Action output should follow the syntax as given below:
 click [element ID]: This action clicks on an element with a specific ID on the webpage.
 type [element ID] [content]: Use this to type the content into the field with id. By default, the
 "Enter" key is pressed after typing. Both the content and the ID should be within square braces
 as per the syntax.
 select [element ID] [content of option to select]: Select an option from a dropdown menu. The
 content of the option to select should be within square braces. When you get (select an option)
 tags from the accessibility tree, you need to select the serial number (element_id) corresponding
 to the select tag, not the option, and select the most likely content corresponding to the option as
 input.
 scroll [down]: Scroll the page down.
 scroll [up]: Scroll the page up.
 IMPORTANT: 
 To be successful, it is important to STRICTLY follow the below rules:
 Action generation rules:
 1. You should generate a single atomic action at each step.
 2. The action should be an atomic action from the given vocabulary - click, type, select, scroll
 (up or down), or stop.
 3. The arguments to each action should be within square braces. For example, "click [127]",
 "type [43] [content to type]", "scroll [up]", "scroll [down]".
 4. The natural language form of action (corresponding to the field "action_in_natural_language")
 should be consistent with the grounded version of the action (corresponding to the field "grounded
 _action"). Do NOT add any additional information in the grounded action. For example, if a
 particular element ID is specified in the grounded action, a description of that element must be
 present in the natural language action.
 5. If the type action is selected, the natural language form of action ("action_in_natural_language") should always specify the actual text to be typed.
 6. You should issue a "stop" action if the current webpage asks to log in or for credit card
 information.
 7. To input text, there is NO need to click the textbox first, directly type content. After typing,
 the system automatically hits the 'ENTER' key.
 8. STRICTLY Avoid repeating the same action (click/type) if the webpage remains unchanged.
 You may have selected the wrong web element.
 9. Do NOT use quotation marks in the action generation.
 OUTPUT FORMAT: 
 Please give a short analysis of the screenshot, parsed
 HTML/accessibility tree, then put your answer within ``` ```, for example,
 "In summary, the proposed task and the corresponding action is: ```{
 "action_in_natural_language":<ACTION_IN_NATURAL_LANGUAGE>:str,
 "grounded_action": <ACTION>:str}"```
 """
 # User prompt
 cot_user_prompt = """
 Website URL: {INIT_URL}
 Parsed HTML/Accessibility Tree: {A11Y_TREE}
 Screenshot ocr result: {SCREENSHOT}
 Task description: {TASK_DESCRIPTION}
 """
 # 创建文件锁
 file_locks = {
    "temp_analysis/results.json": threading.Lock(),
    "temp_analysis/results_success.json": threading.Lock(),
    "temp_analysis/results_failure.json": threading.Lock()
 }
 def call_api(messages):
    """使用openai库调用API接口"""
    try:
        response = client.chat.completions.create(
            messages=messages,
            # model="gpt-4o-mini"
            model="aiproxy/deepseek-reasoner"
        )
        return response
    except Exception as e:
        log.error(f"API调用出错: {e}")
        return None
 def extract_action(response_text):
    """从API响应中提取action_in_natural_language和grounded_action"""
    # 使用正则表达式提取JSON部分
    match = re.search(r'```\s*{(.+?)}```', response_text, re.DOTALL)
    if match:
        try:
            # 构建完整的JSON字符串并解析
            json_str = "{" + match.group(1) + "}"
            action_data = json.loads(json_str)
            return action_data.get("action_in_natural_language"), action_data.get("grounded_action")
        except json.JSONDecodeError:
            log.error(f"无法解析JSON: {match.group(1)}")
    return None, None
 def check_answer(grounded_action, answer):
    """检查grounded_action是否匹配answer"""
    if not grounded_action or not grounded_action.startswith("click"):
        return False
    # 提取element ID
    match = re.search(r'click \[(\d+)\]', grounded_action)
    if not match:
        return False
    element_id = match.group(1)
    # 将answer拆分为列表（逗号分隔）
    answer_ids = [id.strip() for id in answer.split(",")]
    # 检查element ID是否在answer列表中
    return element_id in answer_ids
 def load_successful_items():
    """加载已经成功的测试项目ID"""
    successful_ids = set()
    try:
        if os.path.exists("temp_analysis/results_success.json"):
            with open("temp_analysis/results_success.json", "r") as f:
                try:
                    success_data = json.load(f)
                    successful_ids = {item["id"] for item in success_data if "id" in item}
                except json.JSONDecodeError:
                    log.warning("无法解析results_success.json，将视为空文件")
    except Exception as e:
        log.error(f"加载成功项目时出错: {e}")
    return successful_ids
 def process_item(item, successful_ids, max_retries=3):
    """处理单个测试项目，失败时最多重试3次"""
    item_id = item["id"]
    # 检查是否已经成功完成
    if item_id in successful_ids:
        log.info(f"ID: {item_id} 已经成功完成，跳过")
        return None
    url = item["url"]
    task_description = item["question"]
    answer = item["answer"]
    answer_text = item["answer_text"]
    log.info(f"处理ID: {item_id}, URL: {url}")
    # 读取a11y tree
    try:
        with open(f"axtrees/{item_id}.txt", "r") as f:
            a11y_tree = f.read()
    except FileNotFoundError:
        log.error(f"找不到文件: axtrees/{item_id}.txt")
        return None
    # 构建API请求
    messages = [
        {"role": "system", "content": cot_system_prompt},
        {"role": "user", "content": cot_user_prompt.format(
            INIT_URL=url,
            A11Y_TREE=a11y_tree,
            SCREENSHOT="",  # 这里没有提供截图OCR结果
            TASK_DESCRIPTION=task_description
        )}
    ]
    print(f"messages: #######\n {messages} \n######")
    log.info(f"task_description: {task_description}")
    log.info(f"answer: {answer}, answer_text: {answer_text}")
    # 尝试调用API，最多重试max_retries次
    attempts_made = 0
    all_attempts = []  # 记录所有尝试的结果
    for attempt in range(max_retries):
        attempts_made = attempt + 1
        timestamp = datetime.datetime.now().isoformat()
        # 调用API
        response = call_api(messages)
        if response and hasattr(response, 'choices') and len(response.choices) > 0:
            content = response.choices[0].message.content
            reasoning_content = response.choices[0].message.reasoning_content if hasattr(response.choices[0].message, 'reasoning_content') else ""
            log.info(f"reasoning_content: {reasoning_content}")
            log.info(f"content: {content}")
            # 提取action
            action_nl, grounded_action = extract_action(content)
            # 检查答案
            is_correct = check_answer(grounded_action, answer)
            # 记录本次尝试
            attempt_result = {
                "attempt": attempt + 1,
                "timestamp": timestamp,
                "action_nl": action_nl,
                "grounded_action": grounded_action,
                "is_correct": is_correct,
                "thinking": reasoning_content
            }
            all_attempts.append(attempt_result)
            # 如果成功，直接返回结果
            if is_correct:
                log.info(f"ID: {item_id} 测试成功!")
                break
            else:
                log.info(f"ID: {item_id} 测试失败，尝试 {attempt+1}/{max_retries}")
                # 如果不是最后一次尝试，继续重试
                if attempt < max_retries - 1:
                    continue
        else:
            log.error(f"API调用失败: {response}")
            # 记录失败的尝试
            attempt_result = {
                "attempt": attempt + 1,
                "timestamp": timestamp,
                "action_nl": None,
                "grounded_action": None,
                "is_correct": False,
                "thinking": "API调用失败"
            }
            all_attempts.append(attempt_result)
            # 如果不是最后一次尝试，继续重试
            if attempt < max_retries - 1:
                log.info(f"ID: {item_id} API调用失败，尝试 {attempt+1}/{max_retries}")
                continue
            else:
                reasoning_content = ""
                content = ""
                action_nl = None
                grounded_action = None
                is_correct = False
    # 获取最后一次尝试的结果（无论成功与否）
    last_attempt = all_attempts[-1] if all_attempts else None
    # 记录结果
    result = {
        "id": item_id,
        "url": url,
        "task": task_description,
        "expected_answer": answer,
        "answer_text": answer_text,
        "thinking": last_attempt.get("thinking", "") if last_attempt else "",
        "action_nl": last_attempt.get("action_nl", None) if last_attempt else None,
        "grounded_action": last_attempt.get("grounded_action", None) if last_attempt else None,
        "is_correct": last_attempt.get("is_correct", False) if last_attempt else False,
        "attempts": attempts_made,  # 添加重试次数记录
        "timestamp": datetime.datetime.now().isoformat(),  # 添加时间戳
        "all_attempts": all_attempts  # 记录所有尝试的详细信息
    }
    log.info(f"ID: {item_id}")
    log.info(f"任务: {task_description}")
    log.info(f"动作: {last_attempt.get('grounded_action', None) if last_attempt else None}")
    log.info(f"是否正确: {last_attempt.get('is_correct', False) if last_attempt else False}")
    log.info(f"尝试次数: {attempts_made}")
    log.info("-" * 50)
    return result
 def append_to_result_file(result, filename):
    """将结果追加到指定的JSON文件中，使用锁防止并发冲突"""
    try:
        # 获取对应文件的锁
        with file_locks.get(filename, threading.Lock()):
            # 如果文件存在，读取现有内容
            if os.path.exists(filename):
                with open(filename, 'r') as f:
                    try:
                        data = json.load(f)
                    except json.JSONDecodeError:
                        # 如果文件为空或格式不正确，创建新列表
                        data = []
            else:
                data = []
            # 追加新结果
            data.append(result)
            # 写回文件
            with open(filename, 'w') as f:
                json.dump(data, f, indent=2)
            return True
    except Exception as e:
        log.error(f"写入结果到文件 {filename} 时出错: {e}")
        return False
 def main():
    # 读取exam.json
    with open("temp_analysis/exam.json", "r") as f:
        exam_data = json.load(f)
    # 加载已经成功的测试项目ID
    successful_ids = load_successful_items()
    log.info(f"已经成功完成的测试项目数: {len(successful_ids)}, 成功ID: {successful_ids}")
    results = []
    total_items = len(exam_data)
    completed = 0
    success_count = 0
    fail_count = 0
    skip_count = 0
    log.info(f"开始测试，总共 {total_items} 个任务")
    # 确保结果文件存在并初始化为空列表（如果不存在的话）
    for filename in ["temp_analysis/results.json", "temp_analysis/results_failure.json"]:
        if not os.path.exists(filename):
            with open(filename, 'w') as f:
                json.dump([], f)
    # 使用线程池并发处理
    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
        # 提交所有任务
        future_to_item = {executor.submit(process_item, item, successful_ids): item for item in exam_data}
        # 收集结果
        for future in concurrent.futures.as_completed(future_to_item):
            result = future.result()
            completed += 1
            if result is None:
                skip_count += 1
                log.info(f"跳过一个已完成的测试项目")
            elif result:
                results.append(result)
                # 立即追加到结果文件
                append_to_result_file(result, "temp_analysis/results.json")
                if result["is_correct"]:
                    success_count += 1
                    append_to_result_file(result, "temp_analysis/results_success.json")
                else:
                    fail_count += 1
                    append_to_result_file(result, "temp_analysis/results_failure.json")
            else:
                fail_count += 1
            # 打印当前进度
            progress = (completed / total_items) * 100
            log.info(f"进度: {progress:.2f}% ({completed}/{total_items}) - 成功: {success_count}, 失败: {fail_count}, 跳过: {skip_count}")
    # 计算正确率
    total_processed = success_count + fail_count
    accuracy = success_count / total_processed if total_processed > 0 else 0
    log.info(f"测试完成! 总计: {total_items}题，成功: {success_count}题，失败: {fail_count}题，跳过: {skip_count}题，正确率: {accuracy:.2%}")
    log.info(f"成功结果已保存到 results_success.json")
    log.info(f"失败结果已保存到 results_failure.json")
    log.info(f"全部结果已保存到 results.json")
 if __name__ == "__main__":
    main()
--- a/misc/temp_analysis/train.json
+++ b/misc/temp_analysis/train.json
--- a/misc/test.py
+++ b/misc/test.py
@ -0,0 +1,98 @@
 model_path = "/data1/yuyr/models--bytedance-research--UI-TARS-7B-DPO/snapshots/727b0df39207dafc6cf211a61f29d84b7659c39c/"
 # 打开图片
 # image_path = "file:///data1/yuyr/crawlee/screenshots/0fuABgATggRcGam_57.png"
 image_path = "/data1/yuyr/crawlee/aaa5.png"
 import base64
 def encode_image_to_base64(image_path):
    """
    将图片编码为base64字符串
    """
    try:
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    except Exception as e:
        print(f"图片编码失败: {e}")
        return None
 image_base64 = encode_image_to_base64(image_path)
 from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
 from qwen_vl_utils import process_vision_info
 # 从本地加载模型
 model = Qwen2VLForConditionalGeneration.from_pretrained(
    model_path, torch_dtype="auto", device_map="cuda:0"
 )
 # 从本地加载processor
 processor = AutoProcessor.from_pretrained(model_path)
 # 设置消息，这里图片使用本地png文件路径
 messages = [
    {
                "role": "system",
                "content": """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
 ## Output Format
 ```
 Thought: ...
 Action: ...
 ```
 ## Action Space
 click(start_box='<|box_start|>(x1,y1)<|box_end|>')
 left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
 right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
 drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
 hotkey(key='')
 type(content='') #If you want to submit your input, use \"\" at the end of `content`.
 scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
 wait() #Sleep for 5s and take a screenshot to check for any changes.
 finished()
 call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
 ## Note
 - Use Chinese in `Thought` part.
 - Summarize your next action (with its target element) in one sentence in `Thought` part."""
    },
    {
        "role": "user",
        "content": [
            {
                "type": "image",
                "image_url": f"file://{image_path}"
            },
            {"type": "text", "text": "点击购物"},
        ],
    }
 ]
 # 准备推理输入
 text = processor.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
 )
 image_inputs, video_inputs = process_vision_info(messages)
 inputs = processor(
    text=[text],
    images=image_inputs,
    videos=video_inputs,
    padding=True,
    return_tensors="pt",
 )
 inputs = inputs.to(model.device)
 # 推理：生成输出
 generated_ids = model.generate(**inputs, max_new_tokens=1024)
 print(f"generated_ids: {generated_ids}")
 generated_ids_trimmed = [
    out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
 ]
 print(f"generated_ids_trimmed: {generated_ids_trimmed}")
 output_text = processor.batch_decode(
    generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
 )
 print(f"output_text: {output_text}")
--- a/misc/test_axtree.js
+++ b/misc/test_axtree.js
@ -0,0 +1,133 @@
 const { chromium } = require('playwright');
 (async () => {
  // 启动浏览器并打开页面
  const browser = await chromium.launch({ headless: true });
  const page = await browser.newPage();
  // await page.goto('https://www.baidu.com');
  // await page.goto('https://play.grafana.org/a/grafana-synthetic-monitoring-app/probes', { waitUntil: 'domcontentloaded' });
  await page.goto('https://play.grafana.org/d/be9htelw63ke8b/metrics-rename-example?orgId=1&from=now-6h&to=now&timezone=utc', { waitUntil: 'domcontentloaded' });
  await page.waitForTimeout(10000);
  console.log('page loaded');
  // 提取完整的 AXTree（设置 interestingOnly: false 可获取全部节点）
  const axTree = await page.accessibility.snapshot({ interestingOnly: false });
  // 全局计数器和编号到"伪选择器"映射的对象
  let idCounter = 1;
  const idToSelector = {};
  // 在文件开头添加一个全局变量来存储父子关系
  const nodeParents = new Map();
  function traverse(node, depth = 0, parent = null) {
    // 记录父节点关系
    nodeParents.set(node, parent);
    // 增加 InlineTextBox 到过滤条件中
    if (((node.role === 'none' || node.role === 'generic' || node.role === 'InlineTextBox') && 
        !node.name && 
        !node.focusable && 
        !node.focused && 
        node.expanded === undefined) ||
        node.role === 'InlineTextBox'  // 无论如何都跳过 InlineTextBox
    ) {
      // 直接处理子节点
      if (node.children && node.children.length > 0) {
        for (const child of node.children) {
          traverse(child, depth, node);
        }
      }
      return;
    }
    const currentId = idCounter++;
    // 构建更详细的 selector
    let selectorParts = [`role=${node.role}`];
    if (node.name) {
      selectorParts.push(`[name="${node.name}"]`);
    }
    // 添加其他可能的属性
    if (node.selected) selectorParts.push('[selected=true]');
    if (node.checked !== undefined) selectorParts.push(`[checked=${node.checked}]`);
    if (node.pressed !== undefined) selectorParts.push(`[pressed=${node.pressed}]`);
    // 如果有父节点，添加父节点信息
    if (parent && parent.role !== 'WebArea') {
      let parentSelector = `role=${parent.role}`;
      if (parent.name) {
        parentSelector += `[name="${parent.name}"]`;
      }
      selectorParts.unshift(`${parentSelector} >>`);
    }
    // 如果是列表项，添加位置信息
    if (parent && parent.children) {
      const siblingIndex = parent.children.findIndex(child => child === node);
      if (siblingIndex !== -1) {
        selectorParts.push(`:nth-match(${siblingIndex + 1})`);
      }
    }
    idToSelector[currentId] = selectorParts.join(' ');
    // 收集所有可能的属性
    let props = [];
    if (node.focusable) props.push('focusable');
    if (node.focused) props.push('focused');
    if (node.expanded !== undefined) props.push(`expanded=${node.expanded}`);
    if (node.selected) props.push('selected');
    if (node.checked !== undefined) props.push(`checked=${node.checked}`);
    if (node.disabled) props.push('disabled');
    if (node.required) props.push('required');
    if (node.pressed !== undefined) props.push(`pressed=${node.pressed}`);
    // 判断元素是否可点击
    const clickableRoles = ['button', 'link', 'menuitem', 'tab', 'checkbox', 'radio', 'switch', 'option'];
    const isClickable = clickableRoles.includes(node.role) || 
                       node.focusable || 
                       node.role === 'generic' && node.name && node.focusable;
    if (isClickable) props.push('clickable');
    const indent = ' '.repeat(depth * 4);
    console.log(`${indent}[${currentId}] ${node.role} '${node.name || ''}'${props.length > 0 ? ' (' + props.join(', ') + ')' : ''}`);
    if (node.children && node.children.length > 0) {
      for (const child of node.children) {
        traverse(child, depth + 1, node);
      }
    }
  }
  // 输出 AXTree 的整体结构
  console.log('## AXTree:');
  // 打印根节点信息（这里用 Root+role 来模拟输出）
  let rootProps = [];
  if (axTree.focusable) rootProps.push('focusable=True');
  if (axTree.focused) rootProps.push('focused');
  console.log(`Root${axTree.role ? axTree.role : 'WebArea'} '${axTree.name || ''}'${rootProps.length > 0 ? ', ' + rootProps.join(', ') : ''}`);
  if (axTree.children && axTree.children.length > 0) {
    for (const child of axTree.children) {
      traverse(child, 1, axTree);
    }
  }
  // 输出编号与伪选择器的映射
  console.log('\n编号与 Selector 映射：');
  console.log(idToSelector);
  // 示例：后续可根据编号获取对应的 selector 进行操作
  // 比如要点击编号为 25 的节点：
  // const selectorForId25 = idToSelector[25];
  // await page.click(selectorForId25);
  //
  // 注意：上面的 selector 为伪生成示例，实际操作中需要根据页面结构生成能唯一定位该元素的 selector。
  await browser.close();
 })();
--- a/misc/test_expand_grafana_v15.js
+++ b/misc/test_expand_grafana_v15.js
@ -0,0 +1,161 @@
 const { chromium } = require('playwright');
 const fs = require('fs');
 const path = require('path');
 (async () => {
    // 启动浏览器
    const browser = await chromium.launch({
        headless: true // 设置为有头模式，方便观察
    });
    const context = await browser.newContext();
    const page = await context.newPage();
    try {
        page.setViewportSize({ width: 2560, height: 1440 });
        // url = "https://play.grafana.org/a/grafana-app-observability-app"
        url = "https://play.grafana.org/dashboards"
        // url = "https://play.grafana.org/a/grafana-synthetic-monitoring-app/probes"
        // 访问目标网站，增加超时时间并添加重试逻辑
        console.log(`正在访问 ${url}...`);
        // 增加超时时间到 120 秒
        await page.goto(url, { 
            timeout: 120000,  // 增加到 120 秒
            waitUntil: 'domcontentloaded'  // 改为只等待 DOM 加载完成，不等待所有资源
        });
        console.log('页面加载完成');
        // 等待页面稳定
        try {
            await page.waitForLoadState('networkidle', { timeout: 30000 });
        } catch (e) {
            console.log('网络未完全空闲，但继续执行：', e.message);
        }
        // 展开所有折叠的部分
        console.log('\n开始展开导航项...');
        // 用 Set 来记录已经点击过的按钮
        const clickedButtons = new Set();
        const expandButtons = async () => {
            console.log('开始寻找可展开按钮...');
            // 查找所有折叠按钮
            const buttons = await page.$$('button[aria-label*="Expand"], button[aria-label*="Open"], button[aria-expanded="false"]');
            console.log(`找到 ${buttons.length} 个折叠按钮`);
            let newButtonsFound = false;
            for (const button of buttons) {
                try {
                    const ariaLabel = await button.getAttribute('aria-label');
                    // 检查按钮是否已经点击过
                    if (!clickedButtons.has(ariaLabel)) {
                        console.log(`点击新按钮: ${ariaLabel}`);
                        await button.click();
                        clickedButtons.add(ariaLabel);
                        newButtonsFound = true;
                        await page.waitForTimeout(200);
                    }
                } catch (e) {
                    console.log(`点击失败: ${e.message}`);
                }
            }
            return newButtonsFound;
        };
        // 持续查找和点击，直到没有新按钮
        let iteration = 1;
        while (true) {
            console.log(`\n第 ${iteration} 次查找...`);
            const foundNewButtons = await expandButtons();
            if (!foundNewButtons) {
                console.log('没有发现新的可展开按钮，结束查找');
                break;
            }
            console.log(`已点击按钮数量: ${clickedButtons.size}`);
            await page.waitForTimeout(500);
            iteration++;
        }
        // 确保截图目录存在
        const screenshotDir = path.join(__dirname, 'temp_screenshot');
        fs.rmSync(screenshotDir, { recursive: true, force: true });
        fs.mkdirSync(screenshotDir, { recursive: true });
        console.log(`创建截图目录: ${screenshotDir}`);
        const anchorHandles = await page.$$('a');
        console.log(`找到 ${anchorHandles.length} 个链接`);
        for (let i = 0; i < anchorHandles.length; i++) {
            try {
                const anchorHandle = anchorHandles[i];
                // 先获取 <a> 标签的 href 与文本内容
                const anchorData = await page.evaluate(el => {
                    return {
                        url: el.href,
                        text: el.innerText.trim()
                    };
                }, anchorHandle);
                // 生成文件名 (使用索引和文本内容的组合)
                let filename = `link_${i+1}_${anchorData.text}`;
                // 替换不合法的文件名字符
                filename = filename.replace(/[\\/:*?"<>|]/g, '_');
                // 限制文件名长度
                if (filename.length > 100) filename = filename.substring(0, 100);
                try {
                    // 使用更可靠的滚动方法
                    await page.evaluate(element => {
                        // 使用JavaScript的scrollIntoView，更直接且兼容性更好
                        element.scrollIntoView({behavior: 'smooth', block: 'center'});
                    }, anchorHandle);
                    await page.waitForTimeout(500); // 给滚动和渲染更多时间
                    const rect = await anchorHandle.boundingBox();
                    filename = `${filename}_${rect.x}_${rect.y}_${rect.width}_${rect.height}.png`;
                    // 截图并保存
                    const screenshotPath = path.join(screenshotDir, filename);
                    await page.screenshot({ path: screenshotPath });
                    console.log(`处理链接 ${i+1}/${anchorHandles.length}: ${anchorData.text} - 已截图保存至 ${filename}`);
                } catch (scrollError) {
                    console.log(`处理链接 ${i+1}/${anchorHandles.length}: ${anchorData.text} - 滚动失败但尝试截图`);
                    // 即使滚动失败也尝试截图
                    try {
                        const screenshotPath = path.join(screenshotDir, filename);
                        await page.screenshot({ path: screenshotPath });
                    } catch (e) {
                        console.error(`截图失败: ${e.message}`);
                    }
                }
            } catch (error) {
                console.error(`处理第 ${i+1} 个链接时出错:`, error.message);
            }
        }
        console.log(`链接总数: ${anchorHandles.length}, 截图已保存到 ${screenshotDir}`);
        console.log('\n等待1000秒...');
        await page.waitForTimeout(1000 * 1000);
    } catch (error) {
        console.error('发生错误:', error);
    } finally {
        // 关闭浏览器
        await browser.close();
    }
 })();
--- a/misc/testoai.py
+++ b/misc/testoai.py
@ -0,0 +1,24 @@
 import os
 from openai import OpenAI
 from dotenv import load_dotenv
 load_dotenv()
 client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
    base_url=os.getenv("OPENAI_API_BASE_URL")  # 如果使用其他兼容服务，可以设置基础URL
 )
 chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "Say this is a test",
        }
    ],
    # model="gpt-4o-mini"
    model="qwen/qwq-32b:free"
 )
 print(chat_completion.choices[0].message.content)
--- a/misc/trajectory_analysis_v17.py
+++ b/misc/trajectory_analysis_v17.py
@ -0,0 +1,239 @@
 import os
 import json
 from dotenv import load_dotenv
 import base64
 from openai import OpenAI
 from pathlib import Path
 import concurrent.futures
 from typing import Dict, Any
 from datetime import datetime
 # 加载环境变量
 load_dotenv()
 MODEL_NAME = "gpt-4o-mini"
 # MODEL_NAME = "UI-TARS-72B-DPO"
 def encode_image(image_path):
    """将图片文件转换为base64编码"""
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')
 def analyze_images(image_paths, prompt):
    """
    分析多个图片并返回结果
    :param image_paths: PNG图片路径列表
    :param prompt: 文字指令
    :return: 模型响应
    """
    # 初始化API客户端
    client = OpenAI(
        api_key=os.getenv("OPENAI_API_KEY"),
        base_url=os.getenv("OPENAI_API_BASE_URL")  # 如果使用其他兼容服务，可以设置基础URL
    )
    # 准备消息内容
    messages = [{"role": "user", "content": []}]
    # 添加文字内容
    messages[0]["content"].append({
        "type": "text",
        "text": prompt
    })
    # 添加所有图片
    for image_path in image_paths:
        base64_image = encode_image(image_path)
        messages[0]["content"].append({
            "type": "image_url",
            "image_url": {
                "url": f"data:image/png;base64,{base64_image}"
            }
        })
    # 调用API
    try:
        response = client.chat.completions.create(
            model=MODEL_NAME,  # 使用指定模型
            messages=messages,
            max_tokens=4000,
            temperature=0.5
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"发生错误: {str(e)}"
 def process_path_meta(meta):
    """
    处理单个路径的元数据，生成图片路径列表和其他信息
    """
    image_paths = []
    for idx, (chain_id, child_num) in enumerate(zip(meta["chainIDs"], meta["chainChildNum"])):
        # 构建图片文件名：ID_childNum.png
        # 如果是最后一个元素，需要使用 "full" 作为 childNum
        if idx == len(meta["chainIDs"]) - 1:
            image_name = f"{chain_id}_full.png"
        else:
            image_name = f"{chain_id}_{child_num}.png"
        image_path = f"screenshots/{image_name}"
        image_paths.append(image_path)
    return {
        "image_paths": image_paths,
        "urls": meta["chainUrls"],
        "text": meta["chainTexts"],
        "boundingbox": meta["chainViewportBoundingBoxes"],
    }
 def process_single_path(url: str, meta: Dict[str, Any], path_index: int) -> Dict[str, Any]:
    """
    处理单个路径的分析
    """
    # 跳过空值
    if meta is None:
        return None
    # 处理路径数据
    processed_data = process_path_meta(meta)
    # 构建提示词
    prompt_template = r"""You are a GUI agent. 
 根据给定的{urls_length}个网页截图，总结网页截图完成了一个什么样的任务，
 从第一个到倒数第二个网页用户点击的按钮文字text list分别是{text}，最后一个网页是最终到达目的页面。
 page_description中描述了用户在每个网页中看到的内容。
 action_description中描述了用户在每个网页中点击的元素，这里元素的文字(用[]包裹)要和前面提供的text list是对应的，还要描述元素所处周围环境。
 task_summaries中提炼轨迹可能对应的用户完成任务，任务内容要无歧义可以验证的，并且要和page_description和action_description相匹配，task_summaries中要包含不少于三个任务。
 示例输出
 {{
    "page_description": [
        "这看起来是一个Granfana首页界面，左边的导航栏已经展开，并且导航栏不是首屏，有滑动痕迹",
        "这看起来是点击了左侧导航栏的Probes选项后显示的Probes列表页面，截图最上面显示当前路径Home > Testing & synthetics > Synthetics > Probes 。而且列表页中显示了多个探测器，每个探测器有名称、版本和有一个View的按钮。页面看起来不是首屏，有滑动痕迹",
        "这是最终到达探测器详情页，标题是Viewing public probe Tokyo(APAC)，页面中还显示了该探测的的Status，Reachability，Location information，Version, Last offline, Last modified, Region等信息。"
    ],
    "action_description": [
        "点击了[Probes]选项。周围环境：Probes选择在导航栏三级菜单，一级菜单是Testing & synthetics，二级菜单是Synthetics，三级菜单有hecks，Probes和Alerts三个选项，我点击了Probes选项。",
        "点击了[Tokyo(APAC)]文字标题。周围环境：Tokyo(APAC)探测器条目在Probes列表页面中，每个探测器有名称、版本和有一个View的按钮",
    ],
    "task_summaries":  [
        {{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的状态。", "answer": "Online" }},
        {{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Reachability。", "answer": "100.0%" }},
        {{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Region。", "answer": "APAC" }},
        {{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Version。", "answer": "v0.10.5-0-g9201a28" }},
        {{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Last offline。", "answer": "March 18, 2025 at 05:23 AM" }},
        {{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Last modified。", "answer": "March 04, 2025 at 07:17 AM" }},
        {{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Location information的Lattitude。", "answer": "35.6762" }},
        {{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Location information的Longitude。", "answer": "139.6503" }}
    ]
 }}
 """
    # 格式化提示词
    prompt = prompt_template.format(
        urls_length=len(processed_data["urls"]),
        urls=processed_data["urls"],
        text=processed_data["text"]
    )
    print(f"Processing path {path_index} for URL: {url}")
    # 调用API分析图片
    result = analyze_images(processed_data["image_paths"], prompt)
    print(f" path {path_index} for url {url} result: {result}")
    try:
        meta["raw_result"] = result
        # 清理和规范化JSON字符串
        parsed_result = result.strip()
        # 移除可能的前缀对话内容
        if "assistant" in parsed_result.lower():
            parsed_result = parsed_result.split("assistant", 1)[-1].strip()
        # 查找第一个 { 和最后一个 } 之间的内容
        start = parsed_result.find('{')
        end = parsed_result.rfind('}')
        if start != -1 and end != -1:
            parsed_result = parsed_result[start:end+1]
        # 尝试解析JSON
        try:
            result_json = json.loads(parsed_result)
            meta["page_description"] = result_json.get("page_description", "未能获取页面描述")
            meta["action_description"] = result_json.get("action_description", "未能获取动作描述")
            meta["task_summaries"] = result_json.get("task_summaries", "未能获取任务摘要")
        except json.JSONDecodeError as e:
            print(f"JSON parsing error for URL {url}: {str(e)}")
            meta["page_description"] = "解析错误：无效的JSON格式"
            meta["action_description"] = f"原始响应：{parsed_result}"
            meta["task_summaries"] = f"原始响应：{parsed_result}"
    except Exception as e:
        print(f"Error processing result for URL {url}: {str(e)}")
        meta["page_description"] = "处理错误"
        meta["action_description"] = f"错误信息：{str(e)}"
        meta["task_summaries"] = f"错误信息：{str(e)}"
    return meta
 def update_json_with_analysis(json_path: str, max_workers: int = 4):
    """
    读取JSON文件，为每个路径添加分析结果，使用并行处理
    """
    # 读取JSON文件
    with open(json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    # 创建任务列表
    tasks = []
    for url, url_data in data.items():
        for i, meta in enumerate(url_data.get("shortestPathsMeta", [])):
            if meta is not None:
                tasks.append((url, meta, i))
    # 定义输出文件路径
    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
    output_path = json_path.replace('.json', f'_with_analysis_{timestamp}.json')
    processed_count = 0
    # 使用线程池并行处理
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        # 提交所有任务
        future_to_task = {
            executor.submit(process_single_path, url, meta, i): (url, i) 
            for url, meta, i in tasks
        }
        # 获取结果并更新数据
        for future in concurrent.futures.as_completed(future_to_task):
            url, path_index = future_to_task[future]
            try:
                result = future.result()
                if result is not None:
                    data[url]["shortestPathsMeta"][path_index] = result
                processed_count += 1
                # 每处理10个条目保存一次
                if processed_count % 10 == 0:
                    with open(output_path, 'w', encoding='utf-8') as f_out:
                        json.dump(data, f_out, ensure_ascii=False, indent=2)
                    print(f"已处理{processed_count}个条目，保存到{output_path}")
            except Exception as e:
                print(f"Error processing path {path_index} for URL {url}: {str(e)}")
    # 最后保存所有数据
    with open(output_path, 'w', encoding='utf-8') as f_out:
        json.dump(data, f_out, ensure_ascii=False, indent=2)
    print(f"全部处理完成，最终保存到{output_path}")
 def main():
    # 更新为处理JSON文件
    json_path = "path/processed_3.json"
    update_json_with_analysis(json_path, max_workers=2)  # 可以根据需要调整并发数
 if __name__ == "__main__":
    main()
--- a/misc/trajectory_analysis_v18.py
+++ b/misc/trajectory_analysis_v18.py
@ -0,0 +1,246 @@
 import os
 import json
 from dotenv import load_dotenv
 import base64
 from openai import OpenAI
 from pathlib import Path
 import concurrent.futures
 from typing import Dict, Any
 from datetime import datetime
 # 加载环境变量
 load_dotenv()
 MODEL_NAME = "gpt-4o-mini"
 # MODEL_NAME = "gpt-4o"
 # MODEL_NAME = "UI-TARS-72B-DPO"
 def encode_image(image_path):
    """将图片文件转换为base64编码"""
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')
 def analyze_images(image_paths, prompt):
    """
    分析多个图片并返回结果
    :param image_paths: PNG图片路径列表
    :param prompt: 文字指令
    :return: 模型响应
    """
    # 初始化API客户端
    client = OpenAI(
        api_key=os.getenv("OPENAI_API_KEY"),
        base_url=os.getenv("OPENAI_API_BASE_URL")  # 如果使用其他兼容服务，可以设置基础URL
    )
    # 准备消息内容
    messages = [{"role": "user", "content": []}]
    # 添加文字内容
    messages[0]["content"].append({
        "type": "text",
        "text": prompt
    })
    # 添加所有图片
    for image_path in image_paths:
        base64_image = encode_image(image_path)
        messages[0]["content"].append({
            "type": "image_url",
            "image_url": {
                "url": f"data:image/png;base64,{base64_image}"
            }
        })
    # 调用API
    try:
        response = client.chat.completions.create(
            model=MODEL_NAME,  # 使用指定模型
            messages=messages,
            max_tokens=4000,
            temperature=0.5
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"发生错误: {str(e)}"
 def process_path_meta(meta):
    """
    处理单个路径的元数据，生成图片路径列表和其他信息
    """
    image_paths = []
    for idx, (chain_id, child_num) in enumerate(zip(meta["chainIDs"], meta["chainChildNum"])):
        # 构建图片文件名：ID_childNum.png
        # 如果是最后一个元素，需要使用 "full" 作为 childNum
        if idx == len(meta["chainIDs"]) - 1:
            image_name = f"{chain_id}_full.png"
        else:
            image_name = f"{chain_id}_{child_num}.png"
        image_path = f"screenshots/{image_name}"
        image_paths.append(image_path)
    return {
        "image_paths": image_paths,
        "urls": meta["chainUrls"],
        "text": meta["chainTexts"],
        "boundingbox": meta["chainViewportBoundingBoxes"],
    }
 def process_single_path(url: str, meta: Dict[str, Any], path_index: int) -> Dict[str, Any]:
    """
    处理单个路径的分析
    """
    # 跳过空值
    if meta is None:
        return None
    # 处理路径数据
    processed_data = process_path_meta(meta)
    ACTION_LIST = processed_data["text"]
    WEBSITE_URL = url
    prompt_template2 = r"""
    Given a list of actions performed on the website {WEBSITE_URL} and the corresponding
 screenshots
 List of actions: {ACTION_LIST}
 Your task is to come up with a single task description that will be accomplished by performing
 these actions in the given sequence on the website.
 IMPORTANT:
 1. The task must contain some actions: “Buy, Book, Find, Check, Choose, show me, search,
 browse, get, compare, view, give me, add to cart, ...”, ideally involving transactions/finding
 information on a specific product or service.
 2. You should propose tasks that are clear and specific.
 3. The task description should provide all the necessary information to complete the task.
 4. The task description must indicate the domain of the website at the end of the task with
 the format: “... on task website”, for instance, “Purchase a laptop on Amazon”, “Book a hair
 appointment on Yelp”, etc.
 5. The task should be feasible to complete by a real user and should not require any additional
 information that is not specified in this input.
 6. The task description should specify constraints like given budget, product features, and other
 specifications that can narrow down the search to a particular item/product.
 7. Do NOT use any quotation marks (either single or double) in the task description.
 The output should be in the below format:
 OUTPUT FORMAT: Please first give some analysis of the actions and screenshots and then
 output the overall task description. put your answer within ``` ```, for example, “In summary,
 the answer is: ```<TASK_DESCRIPTION>:str```”.
    """
    # 构建提示词
    prompt_template = r"""You are a GUI agent. 
 根据给定的{urls_length}个网页截图，总结网页截图完成了一个什么样的任务，
 从第一个到倒数第二个网页用户点击的按钮文字text list分别是{text}，最后一个网页是最终到达目的页面。
 page_description中描述了用户在每个网页中看到的内容。
 action_description中描述了用户在每个网页中点击的元素，这里元素的文字(用[]包裹)要和前面提供的text list是对应的，还要描述元素所处周围环境。
 task_summaries中提炼轨迹可能对应的用户完成任务，任务内容要无歧义可以验证的，并且要和page_description和action_description相匹配，task_summaries中要包含不少于三个任务。
 示例输出
 {{
    "page_description": [
        "这看起来是一个Granfana首页界面，左边的导航栏已经展开，并且导航栏不是首屏，有滑动痕迹",
        "这看起来是点击了左侧导航栏的Probes选项后显示的Probes列表页面，截图最上面显示当前路径Home > Testing & synthetics > Synthetics > Probes 。而且列表页中显示了多个探测器，每个探测器有名称、版本和有一个View的按钮。页面看起来不是首屏，有滑动痕迹",
        "这是最终到达探测器详情页，标题是Viewing public probe Tokyo(APAC)，页面中还显示了该探测的的Status，Reachability，Location information，Version, Last offline, Last modified, Region等信息。"
    ],
    "action_description": [
        "点击了[Probes]选项。周围环境：Probes选择在导航栏三级菜单，一级菜单是Testing & synthetics，二级菜单是Synthetics，三级菜单有hecks，Probes和Alerts三个选项，我点击了Probes选项。",
        "点击了[Tokyo(APAC)]文字标题。周围环境：Tokyo(APAC)探测器条目在Probes列表页面中，每个探测器有名称、版本和有一个View的按钮",
    ],
    "task_summaries":  [
        {{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的状态。", "answer": "Online" }},
        {{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Reachability。", "answer": "100.0%" }},
        {{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Region。", "answer": "APAC" }},
        {{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Version。", "answer": "v0.10.5-0-g9201a28" }},
        {{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Last offline。", "answer": "March 18, 2025 at 05:23 AM" }},
        {{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Last modified。", "answer": "March 04, 2025 at 07:17 AM" }},
        {{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Location information的Lattitude。", "answer": "35.6762" }},
        {{ "question": "查询Grafana合成监控下的Tokyo(APAC)探测器的Location information的Longitude。", "answer": "139.6503" }}
    ]
 }}
 """
    # 格式化提示词
    prompt = prompt_template2.format(
        ACTION_LIST=processed_data["text"],
        WEBSITE_URL=url
    )
    print(f"Processing path {path_index} for URL: {url}")
    # 调用API分析图片
    result = analyze_images(processed_data["image_paths"], prompt)
    print(f" path {path_index} for url {url} result: {result}")
    try:
        meta["raw_result"] = result
        # 从result中提取task_summaries，``` ```之间的内容
        meta["title"] = result.split("```")[1].split("```")[0]
    except Exception as e:
        print(f"Error processing result for URL {url}: {str(e)}")
        meta["title"] = f"错误信息：{str(e)}"
    return meta
 def update_json_with_analysis(json_path: str, max_workers: int = 4):
    """
    读取JSON文件，为每个路径添加分析结果，使用并行处理
    """
    # 读取JSON文件
    with open(json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    # 创建任务列表
    tasks = []
    for url, url_data in data.items():
        for i, meta in enumerate(url_data.get("shortestPathsMeta", [])):
            if meta is not None:
                tasks.append((url, meta, i))
    # 定义输出文件路径
    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
    output_path = json_path.replace('.json', f'_with_analysis_{timestamp}.json')
    processed_count = 0
    # 使用线程池并行处理
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        # 提交所有任务
        future_to_task = {
            executor.submit(process_single_path, url, meta, i): (url, i) 
            for url, meta, i in tasks
        }
        # 获取结果并更新数据
        for future in concurrent.futures.as_completed(future_to_task):
            url, path_index = future_to_task[future]
            try:
                result = future.result()
                if result is not None:
                    data[url]["shortestPathsMeta"][path_index] = result
                processed_count += 1
                # 每处理10个条目保存一次
                if processed_count % 10 == 0:
                    with open(output_path, 'w', encoding='utf-8') as f_out:
                        json.dump(data, f_out, ensure_ascii=False, indent=2)
                    print(f"已处理{processed_count}个条目，保存到{output_path}")
            except Exception as e:
                print(f"Error processing path {path_index} for URL {url}: {str(e)}")
    # 最后保存所有数据
    with open(output_path, 'w', encoding='utf-8') as f_out:
        json.dump(data, f_out, ensure_ascii=False, indent=2)
    print(f"全部处理完成，最终保存到{output_path}")
 def main():
    # 更新为处理JSON文件
    json_path = "path/processed_3.json"
    update_json_with_analysis(json_path, max_workers=2)  # 可以根据需要调整并发数
 if __name__ == "__main__":
    main()
--- a/misc/trajectory_test_description_v17.py
+++ b/misc/trajectory_test_description_v17.py
@ -0,0 +1,513 @@
 import json
 import os
 import re
 import requests
 import logging
 import base64
 from openai import OpenAI
 from dotenv import load_dotenv
 from PIL import Image
 import random
 import matplotlib.pyplot as plt
 from collections import Counter
 from datetime import datetime
 import shutil
 import torch
 from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
 import io
 from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
 from qwen_vl_utils import process_vision_info
 # 配置日志
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 test_dir = 'test'
 # 创建test目录
 os.makedirs(test_dir, exist_ok=True)
 # 在全局范围内定义模型和tokenizer变量
 tokenizer = None
 model = None
 # 在全局范围内定义模型和处理器变量
 qwen_model = None
 processor = None
 # 添加模型加载函数
 def load_json_data(file_path):
    """
    从指定路径加载JSON数据
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return json.load(file)
    except Exception as e:
        logger.error(f"加载JSON文件失败: {e}")
        return None
 def encode_image_to_base64(image_path):
    """
    将图片编码为base64字符串
    """
    try:
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    except Exception as e:
        logger.error(f"图片编码失败: {e}")
        return None
 def load_qwen_model():
    """
    加载Qwen2VL模型和处理器
    """
    global qwen_model, processor
    # 设置本地模型路径
    model_path = "/data1/yuyr/models--bytedance-research--UI-TARS-7B-DPO/snapshots/727b0df39207dafc6cf211a61f29d84b7659c39c/"
    try:
        logger.info("正在加载Qwen2VL模型和处理器...")
        start_time = datetime.now()
        # 加载模型
        qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
            model_path, torch_dtype="auto", device_map="cuda:0"
        )
        logger.info("Qwen2VL模型加载完成")
        # 加载processor
        processor = AutoProcessor.from_pretrained(model_path)
        logger.info("处理器加载完成")
        end_time = datetime.now()
        load_time = (end_time - start_time).total_seconds()
        logger.info(f"Qwen2VL模型和处理器加载完成，耗时: {load_time:.2f}秒")
        return True
    except Exception as e:
        logger.error(f"Qwen2VL模型加载失败: {e}")
        return False
 def call_gpt4o_mini(image_path, title, messages=None, max_retries=3):
    """
    使用本地HuggingFace模型代替OpenAI API，获取下一步应该点击的坐标
    """
    global qwen_model, processor
    # 使用Qwen2VL模型进行推理
    try:
        # 检查模型是否已加载
        if qwen_model is None or processor is None:
            logger.warning("模型或处理器未加载，尝试加载...")
            if not load_qwen_model():
                raise ValueError("模型加载失败")
        # 导入必要的库
        from qwen_vl_utils import process_vision_info
        # 如果是新会话，初始化消息列表
        if messages is None:
            messages = [
                {
                    "role": "system",
                    "content": """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
 ## Output Format
 ```
 Thought: ...
 Action: ...
 ```
 ## Action Space
 click(start_box='<|box_start|>(x1,y1)<|box_end|>')
 left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
 right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
 drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
 hotkey(key='')
 type(content='') #If you want to submit your input, use \"\" at the end of `content`.
 scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
 wait() #Sleep for 5s and take a screenshot to check for any changes.
 finished()
 call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
 ## Note
 - Use Chinese in `Thought` part.
 - Summarize your next action (with its target element) in one sentence in `Thought` part."""
                }
            ]
        else:
            # 把messages中的role为user的content中的image删掉
            for message in messages:
                if message["role"] == "user":
                    message["content"] = [content for content in message["content"] if content["type"] != "image"]
        # 构建当前步骤的用户消息
        user_message = {
            "role": "user",
            "content": [
                {
                    "type": "image",
                    "image_url": f"file://{image_path}"
                },
                {"type": "text", "text": title},
            ],
        }
        # 添加用户消息到会话历史
        messages.append(user_message)
        # 准备推理输入
        text = processor.apply_chat_template(
            messages, tokenize=False, add_generation_prompt=True
        )
        image_inputs, video_inputs = process_vision_info(messages)
        inputs = processor(
            text=[text],
            images=image_inputs,
            videos=video_inputs,
            padding=True,
            return_tensors="pt",
        )
        inputs = inputs.to(qwen_model.device)
        # 推理：生成输出
        logger.info("正在生成模型回复...")
        generated_ids = qwen_model.generate(**inputs, max_new_tokens=1024)
        generated_ids_trimmed = [
            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
        ]
        output_text = processor.batch_decode(
            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
        )[0]
        logger.info(f"模型输出: {output_text}")
        # 添加助手的回复到会话历史
        assistant_message = {
            "role": "assistant",
            "content": output_text
        }
        messages.append(assistant_message)
        return output_text, messages
    except Exception as e:
        logger.error(f"模型推理失败: {e}")
        if max_retries > 0:
            logger.warning(f"重试中，剩余次数: {max_retries}")
            return call_gpt4o_mini(image_path, title, messages, max_retries - 1)
        else:
            logger.error("重试次数已用完，推理失败")
            return None, messages
 def extract_coordinates(response_text, image_path):
    """
    从API响应中提取坐标，并将1000x1000的坐标映射到实际图片尺寸
    """
    logger.info(f"API响应: {response_text}")
    try:
        # 使用正则表达式提取坐标
        pattern = r'\((\d+),(\d+)\)'  # 匹配格式 (x,y)
        match = re.search(pattern, response_text)
        if match:
            # 获取1000x1000下的坐标
            x_1000 = int(match.group(1))
            y_1000 = int(match.group(2))
            # 获取实际图片尺寸
            with Image.open(image_path) as img:
                actual_width, actual_height = img.size
            # 映射坐标到实际尺寸
            x = round(actual_width * x_1000 / 1000)
            y = round(actual_height * y_1000 / 1000)
            logger.info(f"坐标映射: 从 ({x_1000}, {y_1000}) 映射到 ({x}, {y})")
            return {
                "raw_x": x_1000,
                "raw_y": y_1000,
                "adjust_x": x,
                "adjust_y": y
            }
        return None
    except Exception as e:
        logger.error(f"坐标提取或映射失败: {e}")
        return None
 def is_point_in_box(point, box):
    """
    判断点是否在边界框内
    """
    return (box["x"] <= point["adjust_x"] <= box["x"] + box["width"] and
            box["y"] <= point["adjust_y"] <= box["y"] + box["height"])
 def test_path(url, path_data, use_title=True):
    """
    测试单个路径
    Args:
        url: 路径URL
        path_data: 路径数据
        use_title: 是否使用title作为任务描述，False则使用description
    """
    logger.info(f"开始测试路径: {url}, 使用{'标题' if use_title else '描述'}")
    meta = path_data["shortestPathsMeta"][0]
    title = meta["title"] if use_title else meta["description"]
    task_type = "title" if use_title else "description"
    chain_ids = meta["chainIDs"]
    child_nums = meta["chainChildNum"]
    viewport_boxes = meta["chainViewportBoundingBoxes"]
    total_steps = len(chain_ids) - 1
    success_steps = 0
    steps_data = []
    logger.info(f"任务内容: {title}")
    logger.info(f"总步骤数: {total_steps}")
    # 初始化会话历史
    messages = None
    # 遍历每个步骤
    for i in range(total_steps):
        current_id = chain_ids[i]
        child_num = child_nums[i] if i < len(child_nums) else None
        # 构建图片路径
        if child_num is not None:
            image_path = f"screenshots/{current_id}_{child_num}.png"
        else:
            image_path = f"screenshots/{current_id}.png"
        logger.info(f"步骤 {i+1}/{total_steps}: 处理图片 {image_path}")
        # 检查图片是否存在
        if not os.path.exists(image_path):
            logger.error(f"图片不存在: {image_path}")
            break
        # 调用模型获取点击坐标，传入会话历史
        response, messages = call_gpt4o_mini(image_path, title, messages)
        if not response:
            logger.error("模型调用失败")
            break
        # 提取坐标
        coordinates = extract_coordinates(response, image_path)
        if not coordinates:
            logger.error("无法从响应中提取坐标")
            break
        logger.info(f"模型返回坐标: {coordinates}")
        # 判断坐标是否在边界框内
        target_box = viewport_boxes[i] if i < len(viewport_boxes) else None
        step_success = False
        if target_box and is_point_in_box(coordinates, target_box):
            logger.info("坐标在边界框内，步骤成功!")
            success_steps += 1
            step_success = True
        else:
            logger.warning(f"坐标不在边界框内，步骤失败。目标边界框: {target_box}")
        # 记录步骤数据
        step_data = {
            "model_output": response,
            "raw_x": coordinates["raw_x"],
            "raw_y": coordinates["raw_y"],
            "adjust_x": coordinates["adjust_x"],
            "adjust_y": coordinates["adjust_y"],
            "bounding_box": target_box,
            "is_success": step_success
        }
        steps_data.append(step_data)
        # 如果步骤失败，终止后续步骤
        if not step_success:
            break
    # 总结测试结果
    is_success = (success_steps == total_steps)
    logger.info(f"路径测试完成: 总步骤 {total_steps}, 成功步骤 {success_steps}, 路径是否完全成功: {is_success}")
    return {
        "steps_data": steps_data,
        "success_steps": success_steps,
        "is_success": is_success
    }
 def main():
    """
    主函数，从path数据中随机选择10条路径进行测试
    """
    # 加载Qwen2VL模型
    logger.info("开始加载Qwen2VL模型...")
    if not load_qwen_model():
        logger.error("Qwen2VL模型加载失败，程序退出")
        return
    # 加载原始路径数据
    path_json = "path/processed_3_with_analysis.json"
    data = load_json_data(path_json)
    if not data:
        logger.error("数据加载失败，程序退出")
        return
    # 随机选择10条路径
    urls = list(data.keys())
    # if len(urls) > 10:
    #     selected_urls = random.sample(urls, 10)
    # else:
    #     selected_urls = urls
    selected_urls = urls
    # 准备结果数据
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S,%f")[:-3]
    result_data = {
        "timestamp": timestamp,
        "num": len(selected_urls),
        "list": []
    }
    # 测试每条路径
    for url in selected_urls:
        path_data = data[url]
        meta = path_data["shortestPathsMeta"][0]
        title = meta["title"]
        description = meta["description"]
        total_steps = len(meta["chainIDs"]) - 1
        # 记录路径长度，用于文件名
        path_length = total_steps
        logger.info(f"测试路径: {url}")
        logger.info(f"标题: {title}")
        logger.info(f"描述: {description}")
        # 使用标题测试
        title_result = test_path(url, path_data, use_title=True)
        # 使用描述测试
        desc_result = test_path(url, path_data, use_title=False)
        # 记录结果
        path_result = {
            "url": url,
            "title": title,
            "description": description,
            "total_steps": total_steps,
            "title_steps": title_result["steps_data"],
            "desc_steps": desc_result["steps_data"],
            "title_success_steps": title_result["success_steps"],
            "title_is_success": title_result["is_success"],
            "desc_success_step": desc_result["success_steps"],
            "desc_is_success": desc_result["is_success"]
        }
        result_data["list"].append(path_result)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    # 保存结果
    result_file = f"{test_dir}/sample_path_{path_length + 1}_{timestamp}.json"
    with open(result_file, 'w', encoding='utf-8') as f:
        json.dump(result_data, f, indent=2, ensure_ascii=False)
    logger.info(f"结果已保存到 {result_file}")
    # 统计成功步数的分布并生成图表
    generate_statistics(result_data)
 def generate_statistics(result_data):
    """
    生成统计图表
    """
    # 统计标题测试的成功步数分布
    title_success_steps = [item["title_success_steps"] for item in result_data["list"]]
    title_success_counts = Counter(title_success_steps)
    # 统计描述测试的成功步数分布
    desc_success_steps = [item["desc_success_step"] for item in result_data["list"]]
    desc_success_counts = Counter(desc_success_steps)
    # 绘制标题测试的成功步数分布图
    plt.figure(figsize=(10, 6))
    title_steps = sorted(title_success_counts.keys())
    title_counts = [title_success_counts[step] for step in title_steps]
    plt.bar(title_steps, title_counts, color='blue', alpha=0.7)
    plt.title('标题测试成功步数分布')
    plt.xlabel('成功步数')
    plt.ylabel('URL数量')
    # 在每个柱子上方显示具体数值
    for i, count in enumerate(title_counts):
        plt.text(title_steps[i], count, str(count), ha='center', va='bottom')
    # 保存图片
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    title_chart_path = f'{test_dir}/title_success_steps_distribution_{timestamp}.png'
    plt.savefig(title_chart_path)
    plt.close()
    # 绘制描述测试的成功步数分布图
    plt.figure(figsize=(10, 6))
    desc_steps = sorted(desc_success_counts.keys())
    desc_counts = [desc_success_counts[step] for step in desc_steps]
    plt.bar(desc_steps, desc_counts, color='green', alpha=0.7)
    plt.title('描述测试成功步数分布')
    plt.xlabel('成功步数')
    plt.ylabel('URL数量')
    # 在每个柱子上方显示具体数值
    for i, count in enumerate(desc_counts):
        plt.text(desc_steps[i], count, str(count), ha='center', va='bottom')
    # 保存图片
    desc_chart_path = f'{test_dir}/desc_success_steps_distribution_{timestamp}.png'
    plt.savefig(desc_chart_path)
    plt.close()
    # 绘制对比图
    plt.figure(figsize=(12, 7))
    # 合并所有可能的步数
    all_steps = sorted(set(title_steps + desc_steps))
    # 获取每个步数对应的计数
    title_all_counts = [title_success_counts.get(step, 0) for step in all_steps]
    desc_all_counts = [desc_success_counts.get(step, 0) for step in all_steps]
    # 设置柱状图的位置
    x = range(len(all_steps))
    width = 0.35
    # 绘制柱状图
    plt.bar([i - width/2 for i in x], title_all_counts, width, label='brief', color='blue', alpha=0.7)
    plt.bar([i + width/2 for i in x], desc_all_counts, width, label='detail', color='green', alpha=0.7)
    # 设置图表标题和标签
    plt.title('brief vs detail success steps distribution')
    plt.xlabel('success steps')
    plt.ylabel('URL count')
    plt.xticks(x, all_steps)
    plt.legend()
    # 在每个柱子上方显示具体数值
    for i, count in enumerate(title_all_counts):
        if count > 0:
            plt.text(i - width/2, count, str(count), ha='center', va='bottom')
    for i, count in enumerate(desc_all_counts):
        if count > 0:
            plt.text(i + width/2, count, str(count), ha='center', va='bottom')
    # 保存图片
    compare_chart_path = f'{test_dir}/title_vs_desc_distribution_{timestamp}.png'
    plt.savefig(compare_chart_path)
    plt.close()
    logger.info(f"统计图表已保存: {title_chart_path}, {desc_chart_path}, {compare_chart_path}")
 if __name__ == "__main__":
    main()
--- a/misc/trajectory_test_description_v17_with_summarize.py
+++ b/misc/trajectory_test_description_v17_with_summarize.py
@ -0,0 +1,513 @@
 import json
 import os
 import re
 import requests
 import logging
 import base64
 from openai import OpenAI
 from dotenv import load_dotenv
 from PIL import Image
 import random
 import matplotlib.pyplot as plt
 from collections import Counter
 from datetime import datetime
 import shutil
 import torch
 from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
 import io
 from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
 from qwen_vl_utils import process_vision_info
 # 配置日志
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 test_dir = 'test'
 # 创建test目录
 os.makedirs(test_dir, exist_ok=True)
 # 在全局范围内定义模型和tokenizer变量
 tokenizer = None
 model = None
 # 在全局范围内定义模型和处理器变量
 qwen_model = None
 processor = None
 # 添加模型加载函数
 def load_json_data(file_path):
    """
    从指定路径加载JSON数据
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return json.load(file)
    except Exception as e:
        logger.error(f"加载JSON文件失败: {e}")
        return None
 def encode_image_to_base64(image_path):
    """
    将图片编码为base64字符串
    """
    try:
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    except Exception as e:
        logger.error(f"图片编码失败: {e}")
        return None
 def load_qwen_model():
    """
    加载Qwen2VL模型和处理器
    """
    global qwen_model, processor
    # 设置本地模型路径
    model_path = "/data1/yuyr/models--bytedance-research--UI-TARS-7B-DPO/snapshots/727b0df39207dafc6cf211a61f29d84b7659c39c/"
    try:
        logger.info("正在加载Qwen2VL模型和处理器...")
        start_time = datetime.now()
        # 加载模型
        qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
            model_path, torch_dtype="auto", device_map="cuda:1"
        )
        logger.info("Qwen2VL模型加载完成")
        # 加载processor
        processor = AutoProcessor.from_pretrained(model_path)
        logger.info("处理器加载完成")
        end_time = datetime.now()
        load_time = (end_time - start_time).total_seconds()
        logger.info(f"Qwen2VL模型和处理器加载完成，耗时: {load_time:.2f}秒")
        return True
    except Exception as e:
        logger.error(f"Qwen2VL模型加载失败: {e}")
        return False
 def call_gpt4o_mini(image_path, title, messages=None, max_retries=3):
    """
    使用本地HuggingFace模型代替OpenAI API，获取下一步应该点击的坐标
    """
    global qwen_model, processor
    # 使用Qwen2VL模型进行推理
    try:
        # 检查模型是否已加载
        if qwen_model is None or processor is None:
            logger.warning("模型或处理器未加载，尝试加载...")
            if not load_qwen_model():
                raise ValueError("模型加载失败")
        # 导入必要的库
        from qwen_vl_utils import process_vision_info
        # 如果是新会话，初始化消息列表
        if messages is None:
            messages = [
                {
                    "role": "system",
                    "content": """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
 ## Output Format
 ```
 Thought: ...
 Action: ...
 ```
 ## Action Space
 click(start_box='<|box_start|>(x1,y1)<|box_end|>')
 left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
 right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
 drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
 hotkey(key='')
 type(content='') #If you want to submit your input, use \"\" at the end of `content`.
 scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
 wait() #Sleep for 5s and take a screenshot to check for any changes.
 finished()
 call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
 ## Note
 - Use Chinese in `Thought` part.
 - Summarize your next action (with its target element) in one sentence in `Thought` part."""
                }
            ]
        else:
            # 把messages中的role为user的content中的image删掉
            for message in messages:
                if message["role"] == "user":
                    message["content"] = [content for content in message["content"] if content["type"] != "image"]
        # 构建当前步骤的用户消息
        user_message = {
            "role": "user",
            "content": [
                {
                    "type": "image",
                    "image_url": f"file://{image_path}"
                },
                {"type": "text", "text": title},
            ],
        }
        # 添加用户消息到会话历史
        messages.append(user_message)
        # 准备推理输入
        text = processor.apply_chat_template(
            messages, tokenize=False, add_generation_prompt=True
        )
        image_inputs, video_inputs = process_vision_info(messages)
        inputs = processor(
            text=[text],
            images=image_inputs,
            videos=video_inputs,
            padding=True,
            return_tensors="pt",
        )
        inputs = inputs.to(qwen_model.device)
        # 推理：生成输出
        logger.info("正在生成模型回复...")
        generated_ids = qwen_model.generate(**inputs, max_new_tokens=1024)
        generated_ids_trimmed = [
            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
        ]
        output_text = processor.batch_decode(
            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
        )[0]
        logger.info(f"模型输出: {output_text}")
        # 添加助手的回复到会话历史
        assistant_message = {
            "role": "assistant",
            "content": output_text
        }
        messages.append(assistant_message)
        return output_text, messages
    except Exception as e:
        logger.error(f"模型推理失败: {e}")
        if max_retries > 0:
            logger.warning(f"重试中，剩余次数: {max_retries}")
            return call_gpt4o_mini(image_path, title, messages, max_retries - 1)
        else:
            logger.error("重试次数已用完，推理失败")
            return None, messages
 def extract_coordinates(response_text, image_path):
    """
    从API响应中提取坐标，并将1000x1000的坐标映射到实际图片尺寸
    """
    logger.info(f"API响应: {response_text}")
    try:
        # 使用正则表达式提取坐标
        pattern = r'\((\d+),(\d+)\)'  # 匹配格式 (x,y)
        match = re.search(pattern, response_text)
        if match:
            # 获取1000x1000下的坐标
            x_1000 = int(match.group(1))
            y_1000 = int(match.group(2))
            # 获取实际图片尺寸
            with Image.open(image_path) as img:
                actual_width, actual_height = img.size
            # 映射坐标到实际尺寸
            x = round(actual_width * x_1000 / 1000)
            y = round(actual_height * y_1000 / 1000)
            logger.info(f"坐标映射: 从 ({x_1000}, {y_1000}) 映射到 ({x}, {y})")
            return {
                "raw_x": x_1000,
                "raw_y": y_1000,
                "adjust_x": x,
                "adjust_y": y
            }
        return None
    except Exception as e:
        logger.error(f"坐标提取或映射失败: {e}")
        return None
 def is_point_in_box(point, box):
    """
    判断点是否在边界框内
    """
    return (box["x"] <= point["adjust_x"] <= box["x"] + box["width"] and
            box["y"] <= point["adjust_y"] <= box["y"] + box["height"])
 def test_path(url, path_data, use_title=True):
    """
    测试单个路径
    Args:
        url: 路径URL
        path_data: 路径数据
        use_title: 是否使用title作为任务描述，False则使用description
    """
    logger.info(f"开始测试路径: {url}, 使用{'标题' if use_title else '描述'}")
    meta = path_data["shortestPathsMeta"][0]
    title = meta["task_summaries"][0]["question"] if use_title else meta["raw_result"]
    task_type = "title" if use_title else "raw_result"
    chain_ids = meta["chainIDs"]
    child_nums = meta["chainChildNum"]
    viewport_boxes = meta["chainViewportBoundingBoxes"]
    total_steps = len(chain_ids) - 1
    success_steps = 0
    steps_data = []
    logger.info(f"任务内容: {title}")
    logger.info(f"总步骤数: {total_steps}")
    # 初始化会话历史
    messages = None
    # 遍历每个步骤
    for i in range(total_steps):
        current_id = chain_ids[i]
        child_num = child_nums[i] if i < len(child_nums) else None
        # 构建图片路径
        if child_num is not None:
            image_path = f"screenshots/{current_id}_{child_num}.png"
        else:
            image_path = f"screenshots/{current_id}.png"
        logger.info(f"步骤 {i+1}/{total_steps}: 处理图片 {image_path}")
        # 检查图片是否存在
        if not os.path.exists(image_path):
            logger.error(f"图片不存在: {image_path}")
            break
        # 调用模型获取点击坐标，传入会话历史
        response, messages = call_gpt4o_mini(image_path, title, messages)
        if not response:
            logger.error("模型调用失败")
            break
        # 提取坐标
        coordinates = extract_coordinates(response, image_path)
        if not coordinates:
            logger.error("无法从响应中提取坐标")
            break
        logger.info(f"模型返回坐标: {coordinates}")
        # 判断坐标是否在边界框内
        target_box = viewport_boxes[i] if i < len(viewport_boxes) else None
        step_success = False
        if target_box and is_point_in_box(coordinates, target_box):
            logger.info("坐标在边界框内，步骤成功!")
            success_steps += 1
            step_success = True
        else:
            logger.warning(f"坐标不在边界框内，步骤失败。目标边界框: {target_box}")
        # 记录步骤数据
        step_data = {
            "model_output": response,
            "raw_x": coordinates["raw_x"],
            "raw_y": coordinates["raw_y"],
            "adjust_x": coordinates["adjust_x"],
            "adjust_y": coordinates["adjust_y"],
            "bounding_box": target_box,
            "is_success": step_success
        }
        steps_data.append(step_data)
        # 如果步骤失败，终止后续步骤
        if not step_success:
            break
    # 总结测试结果
    is_success = (success_steps == total_steps)
    logger.info(f"路径测试完成: 总步骤 {total_steps}, 成功步骤 {success_steps}, 路径是否完全成功: {is_success}")
    return {
        "steps_data": steps_data,
        "success_steps": success_steps,
        "is_success": is_success
    }
 def main():
    """
    主函数，从path数据中随机选择10条路径进行测试
    """
    # 加载Qwen2VL模型
    logger.info("开始加载Qwen2VL模型...")
    if not load_qwen_model():
        logger.error("Qwen2VL模型加载失败，程序退出")
        return
    # 加载原始路径数据
    path_json = "path/processed_3_with_analysis.json"
    data = load_json_data(path_json)
    if not data:
        logger.error("数据加载失败，程序退出")
        return
    # 随机选择10条路径
    urls = list(data.keys())
    # if len(urls) > 10:
    #     selected_urls = random.sample(urls, 10)
    # else:
    #     selected_urls = urls
    selected_urls = urls
    # 准备结果数据
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S,%f")[:-3]
    result_data = {
        "timestamp": timestamp,
        "num": len(selected_urls),
        "list": []
    }
    # 测试每条路径
    for url in selected_urls:
        path_data = data[url]
        meta = path_data["shortestPathsMeta"][0]
        title = meta["task_summaries"][0]["question"]
        description = meta["raw_result"]
        total_steps = len(meta["chainIDs"]) - 1
        # 记录路径长度，用于文件名
        path_length = total_steps
        logger.info(f"测试路径: {url}")
        logger.info(f"标题: {title}")
        logger.info(f"描述: {description}")
        # 使用标题测试
        title_result = test_path(url, path_data, use_title=True)
        # 使用描述测试
        desc_result = test_path(url, path_data, use_title=False)
        # 记录结果
        path_result = {
            "url": url,
            "title": title,
            "description": description,
            "total_steps": total_steps,
            "title_steps": title_result["steps_data"],
            "desc_steps": desc_result["steps_data"],
            "title_success_steps": title_result["success_steps"],
            "title_is_success": title_result["is_success"],
            "desc_success_step": desc_result["success_steps"],
            "desc_is_success": desc_result["is_success"]
        }
        result_data["list"].append(path_result)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    # 保存结果
    result_file = f"{test_dir}/sample_path_{path_length + 1}_{timestamp}.json"
    with open(result_file, 'w', encoding='utf-8') as f:
        json.dump(result_data, f, indent=2, ensure_ascii=False)
    logger.info(f"结果已保存到 {result_file}")
    # 统计成功步数的分布并生成图表
    generate_statistics(result_data)
 def generate_statistics(result_data):
    """
    生成统计图表
    """
    # 统计标题测试的成功步数分布
    title_success_steps = [item["title_success_steps"] for item in result_data["list"]]
    title_success_counts = Counter(title_success_steps)
    # 统计描述测试的成功步数分布
    desc_success_steps = [item["desc_success_step"] for item in result_data["list"]]
    desc_success_counts = Counter(desc_success_steps)
    # 绘制标题测试的成功步数分布图
    plt.figure(figsize=(10, 6))
    title_steps = sorted(title_success_counts.keys())
    title_counts = [title_success_counts[step] for step in title_steps]
    plt.bar(title_steps, title_counts, color='blue', alpha=0.7)
    plt.title('标题测试成功步数分布')
    plt.xlabel('成功步数')
    plt.ylabel('URL数量')
    # 在每个柱子上方显示具体数值
    for i, count in enumerate(title_counts):
        plt.text(title_steps[i], count, str(count), ha='center', va='bottom')
    # 保存图片
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    title_chart_path = f'{test_dir}/title_success_steps_distribution_{timestamp}.png'
    plt.savefig(title_chart_path)
    plt.close()
    # 绘制描述测试的成功步数分布图
    plt.figure(figsize=(10, 6))
    desc_steps = sorted(desc_success_counts.keys())
    desc_counts = [desc_success_counts[step] for step in desc_steps]
    plt.bar(desc_steps, desc_counts, color='green', alpha=0.7)
    plt.title('描述测试成功步数分布')
    plt.xlabel('成功步数')
    plt.ylabel('URL数量')
    # 在每个柱子上方显示具体数值
    for i, count in enumerate(desc_counts):
        plt.text(desc_steps[i], count, str(count), ha='center', va='bottom')
    # 保存图片
    desc_chart_path = f'{test_dir}/desc_success_steps_distribution_{timestamp}.png'
    plt.savefig(desc_chart_path)
    plt.close()
    # 绘制对比图
    plt.figure(figsize=(12, 7))
    # 合并所有可能的步数
    all_steps = sorted(set(title_steps + desc_steps))
    # 获取每个步数对应的计数
    title_all_counts = [title_success_counts.get(step, 0) for step in all_steps]
    desc_all_counts = [desc_success_counts.get(step, 0) for step in all_steps]
    # 设置柱状图的位置
    x = range(len(all_steps))
    width = 0.35
    # 绘制柱状图
    plt.bar([i - width/2 for i in x], title_all_counts, width, label='brief', color='blue', alpha=0.7)
    plt.bar([i + width/2 for i in x], desc_all_counts, width, label='detail', color='green', alpha=0.7)
    # 设置图表标题和标签
    plt.title('brief vs detail success steps distribution')
    plt.xlabel('success steps')
    plt.ylabel('URL count')
    plt.xticks(x, all_steps)
    plt.legend()
    # 在每个柱子上方显示具体数值
    for i, count in enumerate(title_all_counts):
        if count > 0:
            plt.text(i - width/2, count, str(count), ha='center', va='bottom')
    for i, count in enumerate(desc_all_counts):
        if count > 0:
            plt.text(i + width/2, count, str(count), ha='center', va='bottom')
    # 保存图片
    compare_chart_path = f'{test_dir}/title_vs_desc_distribution_{timestamp}.png'
    plt.savefig(compare_chart_path)
    plt.close()
    logger.info(f"统计图表已保存: {title_chart_path}, {desc_chart_path}, {compare_chart_path}")
 if __name__ == "__main__":
    main()
--- a/misc/trajectory_test_description_v18.py
+++ b/misc/trajectory_test_description_v18.py
@ -0,0 +1,513 @@
 import json
 import os
 import re
 import requests
 import logging
 import base64
 from openai import OpenAI
 from dotenv import load_dotenv
 from PIL import Image
 import random
 import matplotlib.pyplot as plt
 from collections import Counter
 from datetime import datetime
 import shutil
 import torch
 from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
 import io
 from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
 from qwen_vl_utils import process_vision_info
 # 配置日志
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 test_dir = 'test'
 # 创建test目录
 os.makedirs(test_dir, exist_ok=True)
 # 在全局范围内定义模型和tokenizer变量
 tokenizer = None
 model = None
 # 在全局范围内定义模型和处理器变量
 qwen_model = None
 processor = None
 # 添加模型加载函数
 def load_json_data(file_path):
    """
    从指定路径加载JSON数据
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return json.load(file)
    except Exception as e:
        logger.error(f"加载JSON文件失败: {e}")
        return None
 def encode_image_to_base64(image_path):
    """
    将图片编码为base64字符串
    """
    try:
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    except Exception as e:
        logger.error(f"图片编码失败: {e}")
        return None
 def load_qwen_model():
    """
    加载Qwen2VL模型和处理器
    """
    global qwen_model, processor
    # 设置本地模型路径
    model_path = "/data1/yuyr/models--bytedance-research--UI-TARS-7B-DPO/snapshots/727b0df39207dafc6cf211a61f29d84b7659c39c/"
    try:
        logger.info("正在加载Qwen2VL模型和处理器...")
        start_time = datetime.now()
        # 加载模型
        qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
            model_path, torch_dtype="auto", device_map="cuda:0"
        )
        logger.info("Qwen2VL模型加载完成")
        # 加载processor
        processor = AutoProcessor.from_pretrained(model_path)
        logger.info("处理器加载完成")
        end_time = datetime.now()
        load_time = (end_time - start_time).total_seconds()
        logger.info(f"Qwen2VL模型和处理器加载完成，耗时: {load_time:.2f}秒")
        return True
    except Exception as e:
        logger.error(f"Qwen2VL模型加载失败: {e}")
        return False
 def call_gpt4o_mini(image_path, title, messages=None, max_retries=3):
    """
    使用本地HuggingFace模型代替OpenAI API，获取下一步应该点击的坐标
    """
    global qwen_model, processor
    # 使用Qwen2VL模型进行推理
    try:
        # 检查模型是否已加载
        if qwen_model is None or processor is None:
            logger.warning("模型或处理器未加载，尝试加载...")
            if not load_qwen_model():
                raise ValueError("模型加载失败")
        # 导入必要的库
        from qwen_vl_utils import process_vision_info
        # 如果是新会话，初始化消息列表
        if messages is None:
            messages = [
                {
                    "role": "system",
                    "content": """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
 ## Output Format
 ```
 Thought: ...
 Action: ...
 ```
 ## Action Space
 click(start_box='<|box_start|>(x1,y1)<|box_end|>')
 left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
 right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
 drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
 hotkey(key='')
 type(content='') #If you want to submit your input, use \"\" at the end of `content`.
 scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
 wait() #Sleep for 5s and take a screenshot to check for any changes.
 finished()
 call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
 ## Note
 - Use Chinese in `Thought` part.
 - Summarize your next action (with its target element) in one sentence in `Thought` part."""
                }
            ]
        else:
            # 把messages中的role为user的content中的image删掉
            for message in messages:
                if message["role"] == "user":
                    message["content"] = [content for content in message["content"] if content["type"] != "image"]
        # 构建当前步骤的用户消息
        user_message = {
            "role": "user",
            "content": [
                {
                    "type": "image",
                    "image_url": f"file://{image_path}"
                },
                {"type": "text", "text": title},
            ],
        }
        # 添加用户消息到会话历史
        messages.append(user_message)
        # 准备推理输入
        text = processor.apply_chat_template(
            messages, tokenize=False, add_generation_prompt=True
        )
        image_inputs, video_inputs = process_vision_info(messages)
        inputs = processor(
            text=[text],
            images=image_inputs,
            videos=video_inputs,
            padding=True,
            return_tensors="pt",
        )
        inputs = inputs.to(qwen_model.device)
        # 推理：生成输出
        logger.info("正在生成模型回复...")
        generated_ids = qwen_model.generate(**inputs, max_new_tokens=1024)
        generated_ids_trimmed = [
            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
        ]
        output_text = processor.batch_decode(
            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
        )[0]
        logger.info(f"模型输出: {output_text}")
        # 添加助手的回复到会话历史
        assistant_message = {
            "role": "assistant",
            "content": output_text
        }
        messages.append(assistant_message)
        return output_text, messages
    except Exception as e:
        logger.error(f"模型推理失败: {e}")
        if max_retries > 0:
            logger.warning(f"重试中，剩余次数: {max_retries}")
            return call_gpt4o_mini(image_path, title, messages, max_retries - 1)
        else:
            logger.error("重试次数已用完，推理失败")
            return None, messages
 def extract_coordinates(response_text, image_path):
    """
    从API响应中提取坐标，并将1000x1000的坐标映射到实际图片尺寸
    """
    logger.info(f"API响应: {response_text}")
    try:
        # 使用正则表达式提取坐标
        pattern = r'\((\d+),(\d+)\)'  # 匹配格式 (x,y)
        match = re.search(pattern, response_text)
        if match:
            # 获取1000x1000下的坐标
            x_1000 = int(match.group(1))
            y_1000 = int(match.group(2))
            # 获取实际图片尺寸
            with Image.open(image_path) as img:
                actual_width, actual_height = img.size
            # 映射坐标到实际尺寸
            x = round(actual_width * x_1000 / 1000)
            y = round(actual_height * y_1000 / 1000)
            logger.info(f"坐标映射: 从 ({x_1000}, {y_1000}) 映射到 ({x}, {y})")
            return {
                "raw_x": x_1000,
                "raw_y": y_1000,
                "adjust_x": x,
                "adjust_y": y
            }
        return None
    except Exception as e:
        logger.error(f"坐标提取或映射失败: {e}")
        return None
 def is_point_in_box(point, box):
    """
    判断点是否在边界框内
    """
    return (box["x"] <= point["adjust_x"] <= box["x"] + box["width"] and
            box["y"] <= point["adjust_y"] <= box["y"] + box["height"])
 def test_path(url, path_data, use_title=True):
    """
    测试单个路径
    Args:
        url: 路径URL
        path_data: 路径数据
        use_title: 是否使用title作为任务描述，False则使用description
    """
    logger.info(f"开始测试路径: {url}, 使用{'标题' if use_title else '描述'}")
    meta = path_data["shortestPathsMeta"][0]
    title = meta["title"] if use_title else meta["raw_result"]
    task_type = "title" if use_title else "raw_result"
    chain_ids = meta["chainIDs"]
    child_nums = meta["chainChildNum"]
    viewport_boxes = meta["chainViewportBoundingBoxes"]
    total_steps = len(chain_ids) - 1
    success_steps = 0
    steps_data = []
    logger.info(f"任务内容: {title}")
    logger.info(f"总步骤数: {total_steps}")
    # 初始化会话历史
    messages = None
    # 遍历每个步骤
    for i in range(total_steps):
        current_id = chain_ids[i]
        child_num = child_nums[i] if i < len(child_nums) else None
        # 构建图片路径
        if child_num is not None:
            image_path = f"screenshots/{current_id}_{child_num}.png"
        else:
            image_path = f"screenshots/{current_id}.png"
        logger.info(f"步骤 {i+1}/{total_steps}: 处理图片 {image_path}")
        # 检查图片是否存在
        if not os.path.exists(image_path):
            logger.error(f"图片不存在: {image_path}")
            break
        # 调用模型获取点击坐标，传入会话历史
        response, messages = call_gpt4o_mini(image_path, title, messages)
        if not response:
            logger.error("模型调用失败")
            break
        # 提取坐标
        coordinates = extract_coordinates(response, image_path)
        if not coordinates:
            logger.error("无法从响应中提取坐标")
            break
        logger.info(f"模型返回坐标: {coordinates}")
        # 判断坐标是否在边界框内
        target_box = viewport_boxes[i] if i < len(viewport_boxes) else None
        step_success = False
        if target_box and is_point_in_box(coordinates, target_box):
            logger.info("坐标在边界框内，步骤成功!")
            success_steps += 1
            step_success = True
        else:
            logger.warning(f"坐标不在边界框内，步骤失败。目标边界框: {target_box}")
        # 记录步骤数据
        step_data = {
            "model_output": response,
            "raw_x": coordinates["raw_x"],
            "raw_y": coordinates["raw_y"],
            "adjust_x": coordinates["adjust_x"],
            "adjust_y": coordinates["adjust_y"],
            "bounding_box": target_box,
            "is_success": step_success
        }
        steps_data.append(step_data)
        # 如果步骤失败，终止后续步骤
        if not step_success:
            break
    # 总结测试结果
    is_success = (success_steps == total_steps)
    logger.info(f"路径测试完成: 总步骤 {total_steps}, 成功步骤 {success_steps}, 路径是否完全成功: {is_success}")
    return {
        "steps_data": steps_data,
        "success_steps": success_steps,
        "is_success": is_success
    }
 def main():
    """
    主函数，从path数据中随机选择10条路径进行测试
    """
    # 加载Qwen2VL模型
    logger.info("开始加载Qwen2VL模型...")
    if not load_qwen_model():
        logger.error("Qwen2VL模型加载失败，程序退出")
        return
    # 加载原始路径数据
    path_json = "path/processed_3_with_analysis.json"
    data = load_json_data(path_json)
    if not data:
        logger.error("数据加载失败，程序退出")
        return
    # 随机选择10条路径
    urls = list(data.keys())
    if len(urls) > 10:
        selected_urls = random.sample(urls, 10)
    else:
        selected_urls = urls
    # selected_urls = urls
    # 准备结果数据
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S,%f")[:-3]
    result_data = {
        "timestamp": timestamp,
        "num": len(selected_urls),
        "list": []
    }
    # 测试每条路径
    for url in selected_urls:
        path_data = data[url]
        meta = path_data["shortestPathsMeta"][0]
        title = meta["title"]
        description = meta["raw_result"]
        total_steps = len(meta["chainIDs"]) - 1
        # 记录路径长度，用于文件名
        path_length = total_steps
        logger.info(f"测试路径: {url}")
        logger.info(f"标题: {title}")
        logger.info(f"描述: {description}")
        # 使用标题测试
        title_result = test_path(url, path_data, use_title=True)
        # 使用描述测试
        desc_result = test_path(url, path_data, use_title=False)
        # 记录结果
        path_result = {
            "url": url,
            "title": title,
            "description": description,
            "total_steps": total_steps,
            "title_steps": title_result["steps_data"],
            "desc_steps": desc_result["steps_data"],
            "title_success_steps": title_result["success_steps"],
            "title_is_success": title_result["is_success"],
            "desc_success_step": desc_result["success_steps"],
            "desc_is_success": desc_result["is_success"]
        }
        result_data["list"].append(path_result)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    # 保存结果
    result_file = f"{test_dir}/sample_path_{path_length + 1}_{timestamp}.json"
    with open(result_file, 'w', encoding='utf-8') as f:
        json.dump(result_data, f, indent=2, ensure_ascii=False)
    logger.info(f"结果已保存到 {result_file}")
    # 统计成功步数的分布并生成图表
    generate_statistics(result_data)
 def generate_statistics(result_data):
    """
    生成统计图表
    """
    # 统计标题测试的成功步数分布
    title_success_steps = [item["title_success_steps"] for item in result_data["list"]]
    title_success_counts = Counter(title_success_steps)
    # 统计描述测试的成功步数分布
    desc_success_steps = [item["desc_success_step"] for item in result_data["list"]]
    desc_success_counts = Counter(desc_success_steps)
    # 绘制标题测试的成功步数分布图
    plt.figure(figsize=(10, 6))
    title_steps = sorted(title_success_counts.keys())
    title_counts = [title_success_counts[step] for step in title_steps]
    plt.bar(title_steps, title_counts, color='blue', alpha=0.7)
    plt.title('标题测试成功步数分布')
    plt.xlabel('成功步数')
    plt.ylabel('URL数量')
    # 在每个柱子上方显示具体数值
    for i, count in enumerate(title_counts):
        plt.text(title_steps[i], count, str(count), ha='center', va='bottom')
    # 保存图片
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    title_chart_path = f'{test_dir}/title_success_steps_distribution_{timestamp}.png'
    plt.savefig(title_chart_path)
    plt.close()
    # 绘制描述测试的成功步数分布图
    plt.figure(figsize=(10, 6))
    desc_steps = sorted(desc_success_counts.keys())
    desc_counts = [desc_success_counts[step] for step in desc_steps]
    plt.bar(desc_steps, desc_counts, color='green', alpha=0.7)
    plt.title('描述测试成功步数分布')
    plt.xlabel('成功步数')
    plt.ylabel('URL数量')
    # 在每个柱子上方显示具体数值
    for i, count in enumerate(desc_counts):
        plt.text(desc_steps[i], count, str(count), ha='center', va='bottom')
    # 保存图片
    desc_chart_path = f'{test_dir}/desc_success_steps_distribution_{timestamp}.png'
    plt.savefig(desc_chart_path)
    plt.close()
    # 绘制对比图
    plt.figure(figsize=(12, 7))
    # 合并所有可能的步数
    all_steps = sorted(set(title_steps + desc_steps))
    # 获取每个步数对应的计数
    title_all_counts = [title_success_counts.get(step, 0) for step in all_steps]
    desc_all_counts = [desc_success_counts.get(step, 0) for step in all_steps]
    # 设置柱状图的位置
    x = range(len(all_steps))
    width = 0.35
    # 绘制柱状图
    plt.bar([i - width/2 for i in x], title_all_counts, width, label='brief', color='blue', alpha=0.7)
    plt.bar([i + width/2 for i in x], desc_all_counts, width, label='detail', color='green', alpha=0.7)
    # 设置图表标题和标签
    plt.title('brief vs detail success steps distribution')
    plt.xlabel('success steps')
    plt.ylabel('URL count')
    plt.xticks(x, all_steps)
    plt.legend()
    # 在每个柱子上方显示具体数值
    for i, count in enumerate(title_all_counts):
        if count > 0:
            plt.text(i - width/2, count, str(count), ha='center', va='bottom')
    for i, count in enumerate(desc_all_counts):
        if count > 0:
            plt.text(i + width/2, count, str(count), ha='center', va='bottom')
    # 保存图片
    compare_chart_path = f'{test_dir}/title_vs_desc_distribution_{timestamp}.png'
    plt.savefig(compare_chart_path)
    plt.close()
    logger.info(f"统计图表已保存: {title_chart_path}, {desc_chart_path}, {compare_chart_path}")
 if __name__ == "__main__":
    main()
--- a/misc/trajectory_test_v17.py
+++ b/misc/trajectory_test_v17.py
@ -0,0 +1,310 @@
 import json
 import os
 import re
 import requests
 import logging
 import base64
 from openai import OpenAI
 from dotenv import load_dotenv
 from PIL import Image
 import random
 import matplotlib.pyplot as plt
 from collections import Counter
 from datetime import datetime
 # 配置日志
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 def load_json_data(file_path):
    """
    从指定路径加载JSON数据
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return json.load(file)
    except Exception as e:
        logger.error(f"加载JSON文件失败: {e}")
        return None
 def encode_image_to_base64(image_path):
    """
    将图片编码为base64字符串
    """
    try:
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    except Exception as e:
        logger.error(f"图片编码失败: {e}")
        return None
 def call_gpt4o_mini(image_path, title, messages=None):
    """
    使用 OpenAI SDK 调用 GPT-4o-mini 模型，获取下一步应该点击的坐标
    """
    # 加载环境变量
    load_dotenv()
    # 读取并编码图片
    image_base64 = encode_image_to_base64(image_path)
    if not image_base64:
        return None
    # 从环境变量获取API配置
    api_base = os.getenv('OPENAI_API_BASE_URL')
    api_key = os.getenv('OPENAI_API_KEY')
    if not api_base or not api_key:
        logger.error("未找到API配置环境变量")
        return None
    # 初始化 OpenAI 客户端
    client = OpenAI(
        api_key=api_key,
        base_url=api_base
    )
    # 如果是新会话，初始化消息列表
    if messages is None:
        messages = [
            {
                "role": "system",
                "content": """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
 ## Output Format
 ```
 Thought: ...
 Action: ...
 ```
 ## Action Space
 click(start_box='<|box_start|>(x1,y1)<|box_end|>')
 left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
 right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
 drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
 hotkey(key='')
 type(content='') #If you want to submit your input, use \"\" at the end of `content`.
 scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
 wait() #Sleep for 5s and take a screenshot to check for any changes.
 finished()
 call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
 ## Note
 - Use Chinese in `Thought` part.
 - Summarize your next action (with its target element) in one sentence in `Thought` part."""
            }
        ]
    # 构建当前步骤的用户消息
    user_message = {
        "role": "user",
        "content": [
            {"type": "text", "text": f"你的任务是'{title}'。"},
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/png;base64,{image_base64}"
                }
            }
        ]
    }
    # 添加用户消息到会话历史
    messages.append(user_message)
    try:
        response = client.chat.completions.create(
            model="UI-TARS-72B-DPO",
            messages=messages
        )
        # 获取助手的回复
        assistant_message = {
            "role": "assistant",
            "content": response.choices[0].message.content
        }
        # 添加助手的回复到会话历史
        messages.append(assistant_message)
        return response.choices[0].message.content, messages
    except Exception as e:
        logger.error(f"API调用失败: {e}")
        return None, messages
 def extract_coordinates(response_text, image_path):
    """
    从API响应中提取坐标，并将1000x1000的坐标映射到实际图片尺寸
    """
    logger.info(f"API响应: {response_text}")
    try:
        # 使用正则表达式提取坐标
        pattern = r'\((\d+),(\d+)\)'  # 匹配格式 (x,y)
        match = re.search(pattern, response_text)
        if match:
            # 获取1000x1000下的坐标
            x_1000 = int(match.group(1))
            y_1000 = int(match.group(2))
            # 获取实际图片尺寸
            with Image.open(image_path) as img:
                actual_width, actual_height = img.size
            # 映射坐标到实际尺寸
            x = round(actual_width * x_1000 / 1000)
            y = round(actual_height * y_1000 / 1000)
            logger.info(f"坐标映射: 从 ({x_1000}, {y_1000}) 映射到 ({x}, {y})")
            return {"x": x, "y": y}
        return None
    except Exception as e:
        logger.error(f"坐标提取或映射失败: {e}")
        return None
 def is_point_in_box(point, box):
    """
    判断点是否在边界框内
    """
    return (box["x"] <= point["x"] <= box["x"] + box["width"] and
            box["y"] <= point["y"] <= box["y"] + box["height"])
 def test_path(url, path_data):
    """
    测试单个路径
    """
    logger.info(f"开始测试路径: {url}")
    meta = path_data["shortestPathsMeta"][0]
    title = meta["title"]
    chain_ids = meta["chainIDs"]
    child_nums = meta["chainChildNum"]
    viewport_boxes = meta["chainViewportBoundingBoxes"]
    total_steps = len(chain_ids) - 1
    success_steps = 0
    logger.info(f"任务标题: {title}")
    logger.info(f"总步骤数: {total_steps}")
    # 初始化会话历史
    messages = None
    # 遍历每个步骤
    for i in range(total_steps):
        current_id = chain_ids[i]
        child_num = child_nums[i] if i < len(child_nums) else None
        # 构建图片路径
        if child_num is not None:
            image_path = f"screenshots/{current_id}_{child_num}.png"
        else:
            image_path = f"screenshots/{current_id}.png"
        logger.info(f"步骤 {i+1}/{total_steps}: 处理图片 {image_path}")
        # 检查图片是否存在
        if not os.path.exists(image_path):
            logger.error(f"图片不存在: {image_path}")
            break
        # 调用模型获取点击坐标，传入会话历史
        response, messages = call_gpt4o_mini(image_path, title, messages)
        if not response:
            logger.error("模型调用失败")
            break
        # 提取坐标
        coordinates = extract_coordinates(response, image_path)
        if not coordinates:
            logger.error("无法从响应中提取坐标")
            break
        logger.info(f"模型返回坐标: {coordinates}")
        # 判断坐标是否在边界框内
        target_box = viewport_boxes[i] if i < len(viewport_boxes) else None
        if target_box and is_point_in_box(coordinates, target_box):
            logger.info("坐标在边界框内，步骤成功!")
            success_steps += 1
        else:
            logger.warning(f"坐标不在边界框内，步骤失败。目标边界框: {target_box}")
            break
    # 总结测试结果
    is_success = (success_steps == total_steps)
    logger.info(f"路径测试完成: 总步骤 {total_steps}, 成功步骤 {success_steps}, 路径是否完全成功: {is_success}")
    return {
        "url": url,
        "title": title,
        "total_steps": total_steps,
        "success_steps": success_steps,
        "is_success": is_success
    }
 def main():
    """
    主函数，读取JSON数据并测试随机选择的10个路径
    """
    # 加载JSON数据
    json_path = "path/processed_3_with_analysis.json"
    data = load_json_data(json_path)
    if not data:
        logger.error("数据加载失败，程序退出")
        return
    # 随机选择10个URL
    urls = list(data.keys())
    if len(urls) > 10:
        selected_urls = random.sample(urls, 10)
    else:
        selected_urls = urls
    # 保存所有测试结果
    results = []
    # 测试选中的URL
    for url in selected_urls:
        path_data = data[url]
        result = test_path(url, path_data)
        results.append(result)
    # 输出总体统计
    total_paths = len(results)
    successful_paths = sum(1 for r in results if r["is_success"])
    logger.info(f"测试完成: 总路径数 {total_paths}, 成功路径数 {successful_paths}")
    # 输出详细结果
    for result in results:
        logger.info(f"URL: {result['url']}")
        logger.info(f"  任务: {result['title']}")
        logger.info(f"  总步骤: {result['total_steps']}, 成功步骤: {result['success_steps']}")
        logger.info(f"  路径是否成功: {result['is_success']}")
    # 统计成功步数的分布
    success_steps_counts = Counter(result['success_steps'] for result in results)
    # 绘制柱状图
    plt.figure(figsize=(10, 6))
    steps = sorted(success_steps_counts.keys())
    counts = [success_steps_counts[step] for step in steps]
    plt.bar(steps, counts)
    plt.title('成功步数分布')
    plt.xlabel('成功步数')
    plt.ylabel('URL数量')
    # 在每个柱子上方显示具体数值
    for i, count in enumerate(counts):
        plt.text(steps[i], count, str(count), ha='center', va='bottom')
    # 保存图片
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    save_path = f'success_steps_distribution_{timestamp}.png'
    plt.savefig(save_path)
    plt.close()
    logger.info(f"成功步数分布图已保存为 {save_path}")
 if __name__ == "__main__":
    main()
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@ -0,0 +1,14 @@
 {
  "name": "grafana-crawler",
  "version": "1.0.0",
  "type": "module",
  "dependencies": {
    "axios": "^1.8.3",
    "body-parser": "^1.20.3",
    "cors": "^2.8.5",
    "crawlee": "^3.0.0",
    "express": "^4.21.2",
    "playwright": "^1.50.1",
    "uuid": "^9.0.0"
  }
 }
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,2 @@
 python-dotenv==1.0.0
 openai==1.12.0
--- a/run_crawler.sh
+++ b/run_crawler.sh
@ -0,0 +1 @@
 node crawl_grafana_v18.js
--- a/show_path.js
+++ b/show_path.js
@ -0,0 +1,659 @@
 class PathVisualizer {
    constructor() {
        this.currentData = null;
        this.currentIndex = 0;
        this.allData = {};
        this.urls = {};  // 改为对象，每个长度存储对应的URLs
        this.currentLength = 1;  // 当前选择的路径长度
        this.currentScale = 1;
        this.translateX = 0;
        this.translateY = 0;
        this.isDragging = false;
        this.currentModalBox = null;
        this.analysisData = {}; // 存储分析数据
        this.initializeEventListeners();
        this.initializeModal();
        this.loadData();
    }
    async loadData() {
        try {
            // 加载当前长度的数据
            const response = await fetch(`path/processed_${this.currentLength}.json`);
            const data = await response.json();
            this.allData = data;
            this.urls = Object.keys(this.allData);
            // 加载分析数据
            try {
                const analysisResponse = await fetch(`path/processed_${this.currentLength}_with_analysis.json`);
                this.analysisData = await analysisResponse.json();
            } catch (error) {
                console.error('加载分析数据失败:', error);
                this.analysisData = {};
            }
            // 更新URL总数显示
            this.updateUrlCount();
            this.showPath(0);
        } catch (error) {
            console.error('加载数据失败:', error);
        }
    }
    updateUrlCount() {
        // 创建或更新URL计数显示
        let urlCountElement = document.getElementById('urlCount');
        if (!urlCountElement) {
            urlCountElement = document.createElement('div');
            urlCountElement.id = 'urlCount';
            urlCountElement.className = 'url-count';
            // 获取长度选择器的父元素，并在其后插入计数元素
            const lengthSelector = document.getElementById('pathLength');
            lengthSelector.parentNode.appendChild(urlCountElement);
        }
        // 更新计数显示
        urlCountElement.textContent = `页面数: ${this.urls.length}`;
    }
    initializeEventListeners() {
        document.getElementById('prevBtn').addEventListener('click', () => this.showPrevious());
        document.getElementById('nextBtn').addEventListener('click', () => this.showNext());
        document.getElementById('urlSearch').addEventListener('input', (e) => this.handleSearch(e));
        // 添加随机URL按钮的事件监听
        document.getElementById('randomUrl').addEventListener('click', () => this.showRandomUrl());
        // 添加路径长度选择事件
        document.getElementById('pathLength').addEventListener('change', (e) => {
            this.currentLength = parseInt(e.target.value);
            this.currentIndex = 0;  // 重置索引
            this.loadData();  // 重新加载数据
        });
    }
    initializeModal() {
        const modal = document.getElementById('imageModal');
        const modalImg = document.getElementById('modalImage');
        const zoomIn = document.getElementById('zoomIn');
        const zoomOut = document.getElementById('zoomOut');
        const zoomReset = document.getElementById('zoomReset');
        const closeModalBtn = document.getElementById('closeModal');
        // 关闭模态框
        const closeModal = () => {
            modal.style.display = 'none';
            this.currentScale = 1;
            this.translateX = 0;
            this.translateY = 0;
            this.updateModalImageScale();
        };
        closeModalBtn.onclick = closeModal;
        // 点击空白处关闭
        modal.onclick = (event) => {
            if (event.target === modal) {
                closeModal();
            }
        };
        // 鼠标拖拽
        modalImg.onmousedown = (e) => {
            this.isDragging = true;
            this.startX = e.clientX - this.translateX;
            this.startY = e.clientY - this.translateY;
            e.preventDefault();
        };
        document.onmousemove = (e) => {
            if (!this.isDragging) return;
            this.translateX = e.clientX - this.startX;
            this.translateY = e.clientY - this.startY;
            this.updateModalImageScale();
        };
        document.onmouseup = () => {
            this.isDragging = false;
        };
        // 优化滚轮缩放，以鼠标位置为中心，降低灵敏度
        modal.addEventListener('wheel', (e) => {
            e.preventDefault();
            // 获取鼠标相对于图片的位置
            const rect = modalImg.getBoundingClientRect();
            const mouseX = e.clientX - rect.left;
            const mouseY = e.clientY - rect.top;
            // 计算鼠标在图片上的相对位置（考虑当前的变换）
            const x = (mouseX - this.translateX) / this.currentScale;
            const y = (mouseY - this.translateY) / this.currentScale;
            // 计算缩放比例，降低灵敏度
            const delta = e.deltaY > 0 ? 0.95 : 1.05;  // 从0.9/1.1改为0.95/1.05
            const newScale = this.currentScale * delta;
            // 限制最大和最小缩放比例
            const limitedScale = Math.min(Math.max(newScale, 0.1), 10);  // 限制在0.1到10倍之间
            // 只有当缩放比例在限制范围内才应用变换
            if (limitedScale !== this.currentScale) {
                // 计算新的平移值，保持鼠标位置不变
                this.translateX = mouseX - x * limitedScale;
                this.translateY = mouseY - y * limitedScale;
                this.currentScale = limitedScale;
                this.updateModalImageScale();
            }
        }, { passive: false });
        // 按钮缩放
        zoomIn.onclick = () => {
            this.currentScale *= 1.2;
            this.updateModalImageScale();
        };
        zoomOut.onclick = () => {
            this.currentScale /= 1.2;
            this.updateModalImageScale();
        };
        zoomReset.onclick = () => {
            this.currentScale = 1;
            this.translateX = 0;
            this.translateY = 0;
            this.updateModalImageScale();
        };
    }
    handleSearch(event) {
        const searchTerm = event.target.value.toLowerCase();
        const foundIndex = this.urls.findIndex(url => url.toLowerCase().includes(searchTerm));
        if (foundIndex !== -1) {
            this.showPath(foundIndex);
        }
    }
    showRandomUrl() {
        if (this.urls && this.urls.length > 0) {
            // 生成随机索引
            const randomIndex = Math.floor(Math.random() * this.urls.length);
            // 显示随机选中的路径
            this.showPath(randomIndex);
        }
    }
    showPath(index) {
        if (index < 0 || index >= this.urls.length) return;
        this.currentIndex = index;
        const url = this.urls[index];
        this.currentData = this.allData[url];
        document.getElementById('currentUrl').textContent = `[长度: ${this.currentLength}] ${url}`;
        this.renderPaths();
    }
    showPrevious() {
        this.showPath(this.currentIndex - 1);
    }
    showNext() {
        this.showPath(this.currentIndex + 1);
    }
    openModal(img) {
        const modal = document.getElementById('imageModal');
        const modalImg = document.getElementById('modalImage');
        modal.style.display = 'block';
        modalImg.src = img.src;
        // 重置缩放和位置
        this.currentScale = 1;
        this.translateX = 0;
        this.translateY = 0;
        this.updateModalImageScale();
    }
    updateModalImageScale() {
        const modalImg = document.getElementById('modalImage');
        const modalBox = document.getElementById('modalBoundingBox');
        // 应用缩放和平移
        modalImg.style.transform = `translate(${this.translateX}px, ${this.translateY}px) scale(${this.currentScale})`;
        modalImg.style.transformOrigin = '0 0';  // 设置变换原点为左上角
        if (this.currentModalBox) {
            modalBox.style.transform = `translate(${this.translateX}px, ${this.translateY}px) scale(${this.currentScale})`;
            modalBox.style.transformOrigin = '0 0';
        }
    }
    async renderPaths() {
        const container = document.getElementById('pathsContainer');
        container.innerHTML = '';
        // 获取当前URL的分析数据
        const currentUrl = this.urls[this.currentIndex];
        const analysisForUrl = this.analysisData[currentUrl] || {};
        // 添加任务描述区域 - 即使没有数据也显示空白框架
        const taskContainer = document.createElement('div');
        taskContainer.className = 'task-extract-container';
        // 添加任务标题
        const titleLabel = document.createElement('div');
        titleLabel.className = 'text-label';
        titleLabel.textContent = '任务描述:';
        taskContainer.appendChild(titleLabel);
        const titleText = document.createElement('div');
        titleText.className = 'task-description';
        // 如果有数据则显示，否则留空
        if (analysisForUrl.shortestPathsMeta && analysisForUrl.shortestPathsMeta.length > 0) {
            titleText.textContent = analysisForUrl.shortestPathsMeta[0].title || '';
        }
        taskContainer.appendChild(titleText);
        // 添加过程描述
        const descLabel = document.createElement('div');
        descLabel.className = 'text-label';
        descLabel.textContent = '过程描述:';
        taskContainer.appendChild(descLabel);
        const descText = document.createElement('div');
        descText.className = 'process-description';
        // 如果有数据则显示，否则留空
        if (analysisForUrl.shortestPathsMeta && analysisForUrl.shortestPathsMeta.length > 0) {
            descText.textContent = analysisForUrl.shortestPathsMeta[0].raw_result || '';
        }
        taskContainer.appendChild(descText);
        container.appendChild(taskContainer);
        for (const path of this.currentData.shortestPaths) {
            const pathDiv = document.createElement('div');
            pathDiv.className = 'path-container';
            const meta = this.currentData.shortestPathsMeta.find(m => 
                JSON.stringify(m.chainIDs) === JSON.stringify(path));
            if (!meta) continue;
            // 添加缩略图预览区域
            const thumbnailsDiv = document.createElement('div');
            thumbnailsDiv.className = 'path-thumbnails';
            // 创建所有步骤的缩略图
            for (let i = 0; i < path.length; i++) {
                const thumbDiv = document.createElement('div');
                thumbDiv.className = 'thumbnail-container';
                // 添加步骤编号
                const stepLabel = document.createElement('div');
                stepLabel.className = 'thumbnail-step-label';
                stepLabel.textContent = `${i}`;
                thumbDiv.appendChild(stepLabel);
                // 添加缩略图，使用 chainChildNum 而不是滚动位置
                const imgPath = i === path.length - 1 
                    ? `screenshots/${path[i]}_full.png`
                    : `screenshots/${path[i]}_${meta.chainChildNum[i]}.png`;
                const thumbImg = document.createElement('img');
                thumbImg.src = imgPath;
                thumbImg.className = 'thumbnail-image';
                thumbImg.onclick = () => this.openModal(thumbImg);
                thumbDiv.appendChild(thumbImg);
                thumbnailsDiv.appendChild(thumbDiv);
            }
            pathDiv.appendChild(thumbnailsDiv);
            for (let i = 0; i < path.length; i++) {
                const stepDiv = document.createElement('div');
                stepDiv.className = 'step-container';
                const stepHeader = document.createElement('div');
                stepHeader.className = 'step-header';
                stepHeader.textContent = `步骤 ${i}`;
                stepDiv.appendChild(stepHeader);
                const urlDiv = document.createElement('div');
                urlDiv.className = 'step-url';
                urlDiv.textContent = `URL: ${meta.chainUrls[i]}`;
                stepDiv.appendChild(urlDiv);
                // 添加request ID显示和按钮区域
                const requestId = path[i];
                const requestIdDiv = document.createElement('div');
                requestIdDiv.className = 'request-id-container';
                // 显示request ID
                const requestIdLabel = document.createElement('span');
                requestIdLabel.textContent = `request id: ${requestId}`;
                requestIdDiv.appendChild(requestIdLabel);
                // 添加查看child按钮
                const viewChildBtn = document.createElement('button');
                viewChildBtn.className = 'view-btn';
                viewChildBtn.textContent = '查看child';
                viewChildBtn.onclick = () => this.viewChildJson(requestId);
                requestIdDiv.appendChild(viewChildBtn);
                // 添加查看request queue按钮
                const viewQueueBtn = document.createElement('button');
                viewQueueBtn.className = 'view-btn';
                viewQueueBtn.textContent = '查看request queue';
                viewQueueBtn.onclick = () => this.viewRequestQueueJson(requestId);
                requestIdDiv.appendChild(viewQueueBtn);
                // 添加查看axtree按钮
                const viewAxtreeBtn = document.createElement('button');
                viewAxtreeBtn.className = 'view-btn';
                viewAxtreeBtn.textContent = '查看axtree';
                viewAxtreeBtn.onclick = () => this.viewAxtree(requestId);
                requestIdDiv.appendChild(viewAxtreeBtn);
                stepDiv.appendChild(requestIdDiv);
                if (i < meta.chainTexts.length) {
                    const textDiv = document.createElement('div');
                    textDiv.className = 'path-info';
                    textDiv.textContent = `点击文本: 【${meta.chainTexts[i]}】        AxTreeID: [${meta.chainAxTreeID[i]}]`;
                    stepDiv.appendChild(textDiv);
                }
                // 构建并显示截图路径，使用 chainChildNum 而不是滚动位置
                const imgPath = i === path.length - 1 
                    ? `screenshots/${path[i]}_full.png`
                    : `screenshots/${path[i]}_${meta.chainChildNum[i]}.png`;
                const screenshotPathDiv = document.createElement('div');
                screenshotPathDiv.className = 'screenshot-path';
                screenshotPathDiv.textContent = `截图路径: ${imgPath}`;
                // 添加下载按钮
                const downloadBtn = document.createElement('button');
                downloadBtn.className = 'download-btn';
                downloadBtn.textContent = '下载截图';
                downloadBtn.onclick = () => this.downloadImage(imgPath);
                screenshotPathDiv.appendChild(downloadBtn);
                stepDiv.appendChild(screenshotPathDiv);
                const screenshotDiv = document.createElement('div');
                screenshotDiv.className = 'screenshot-container';
                const img = document.createElement('img');
                img.src = imgPath;
                img.onclick = () => this.openModal(img);
                img.onload = () => {
                    if (i < meta.chainViewportBoundingBoxes.length && i !== path.length - 1) {
                        const box = meta.chainViewportBoundingBoxes[i];
                        const boundingBox = document.createElement('div');
                        boundingBox.className = 'bounding-box';
                        const scale = img.width / img.naturalWidth;
                        boundingBox.style.left = `${box.x * scale}px`;
                        boundingBox.style.top = `${box.y * scale}px`;
                        boundingBox.style.width = `${box.width * scale}px`;
                        boundingBox.style.height = `${box.height * scale}px`;
                        screenshotDiv.appendChild(boundingBox);
                    }
                };
                screenshotDiv.appendChild(img);
                stepDiv.appendChild(screenshotDiv);
                pathDiv.appendChild(stepDiv);
            }
            container.appendChild(pathDiv);
        }
    }
    // 添加查看child JSON的方法
    async viewChildJson(requestId) {
        try {
            const response = await fetch(`path/childs/${requestId}.json`);
            if (!response.ok) {
                throw new Error(`HTTP error! status: ${response.status}`);
            }
            const data = await response.json();
            this.showJsonModal('Child JSON', data);
        } catch (error) {
            console.error('加载child JSON失败:', error);
            alert(`加载child JSON失败: ${error.message}`);
        }
    }
    // 添加查看request queue JSON的方法
    async viewRequestQueueJson(requestId) {
        try {
            const response = await fetch(`storage/request_queues/default/${requestId}.json`);
            if (!response.ok) {
                throw new Error(`HTTP error! status: ${response.status}`);
            }
            const data = await response.json();
            this.showJsonModal('Request Queue JSON', data);
        } catch (error) {
            console.error('加载request queue JSON失败:', error);
            alert(`加载request queue JSON失败: ${error.message}`);
        }
    }
    // 显示JSON模态框
    showJsonModal(title, jsonData) {
        // 检查是否已存在JSON模态框，如果不存在则创建
        let jsonModal = document.getElementById('jsonModal');
        if (!jsonModal) {
            jsonModal = document.createElement('div');
            jsonModal.id = 'jsonModal';
            jsonModal.className = 'modal';
            const modalContent = document.createElement('div');
            modalContent.className = 'modal-content json-modal-content';
            const closeBtn = document.createElement('span');
            closeBtn.className = 'close-btn';
            closeBtn.innerHTML = '&times;';
            closeBtn.onclick = () => { jsonModal.style.display = 'none'; };
            const modalTitle = document.createElement('h3');
            modalTitle.id = 'jsonModalTitle';
            const preElement = document.createElement('pre');
            preElement.id = 'jsonContent';
            preElement.className = 'json-content';
            modalContent.appendChild(closeBtn);
            modalContent.appendChild(modalTitle);
            modalContent.appendChild(preElement);
            jsonModal.appendChild(modalContent);
            document.body.appendChild(jsonModal);
            // 点击模态框外部关闭
            jsonModal.onclick = (event) => {
                if (event.target === jsonModal) {
                    jsonModal.style.display = 'none';
                }
            };
        }
        // 更新模态框内容
        document.getElementById('jsonModalTitle').textContent = title;
        // 格式化并高亮JSON
        const formattedJson = JSON.stringify(jsonData, null, 2);
        const preElement = document.getElementById('jsonContent');
        preElement.textContent = formattedJson;
        // 如果有语法高亮库（如highlight.js），可以在这里应用
        if (window.hljs) {
            preElement.innerHTML = window.hljs.highlight('json', formattedJson).value;
        }
        // 显示模态框
        jsonModal.style.display = 'block';
    }
    // 添加下载图片的方法
    async downloadImage(imgPath) {
        try {
            const response = await fetch(imgPath);
            const blob = await response.blob();
            const url = window.URL.createObjectURL(blob);
            const a = document.createElement('a');
            a.href = url;
            a.download = imgPath.split('/').pop(); // 使用原始文件名
            document.body.appendChild(a);
            a.click();
            window.URL.revokeObjectURL(url);
            document.body.removeChild(a);
        } catch (error) {
            console.error('下载图片失败:', error);
            alert('下载图片失败，请重试');
        }
    }
    // 添加查看axtree的方法
    async viewAxtree(requestId) {
        try {
            // 获取axtree文本文件
            const txtResponse = await fetch(`axtrees/${requestId}.txt`);
            let txtContent = '';
            if (txtResponse.ok) {
                txtContent = await txtResponse.text();
            } else {
                txtContent = '无法加载axtree.txt文件';
            }
            // 获取idToSelector JSON文件
            const jsonResponse = await fetch(`axtrees/${requestId}_idToSelector.json`);
            let jsonData = {};
            if (jsonResponse.ok) {
                jsonData = await jsonResponse.json();
            }
            // 显示组合的结果
            this.showAxtreeModal(requestId, txtContent, jsonData);
        } catch (error) {
            console.error('加载axtree数据失败:', error);
            alert(`加载axtree数据失败: ${error.message}`);
        }
    }
    // 显示Axtree模态框
    showAxtreeModal(requestId, txtContent, jsonData) {
        // 检查是否已存在Axtree模态框，如果不存在则创建
        let axtreeModal = document.getElementById('axtreeModal');
        if (!axtreeModal) {
            axtreeModal = document.createElement('div');
            axtreeModal.id = 'axtreeModal';
            axtreeModal.className = 'modal';
            const modalContent = document.createElement('div');
            modalContent.className = 'modal-content json-modal-content'; // 使用与JSON模态框相同的样式
            const closeBtn = document.createElement('span');
            closeBtn.className = 'close-btn';
            closeBtn.innerHTML = '&times;';
            closeBtn.onclick = () => { axtreeModal.style.display = 'none'; };
            const modalTitle = document.createElement('h3');
            modalTitle.id = 'axtreeModalTitle';
            // 创建选项卡
            const tabContainer = document.createElement('div');
            tabContainer.className = 'tab-container';
            const txtTab = document.createElement('button');
            txtTab.className = 'tab-button active';
            txtTab.textContent = 'Axtree文本';
            txtTab.onclick = () => {
                txtTab.className = 'tab-button active';
                jsonTab.className = 'tab-button';
                txtContentElement.style.display = 'block';
                jsonContentElement.style.display = 'none';
            };
            const jsonTab = document.createElement('button');
            jsonTab.className = 'tab-button';
            jsonTab.textContent = 'idToSelector JSON';
            jsonTab.onclick = () => {
                jsonTab.className = 'tab-button active';
                txtTab.className = 'tab-button';
                jsonContentElement.style.display = 'block';
                txtContentElement.style.display = 'none';
            };
            tabContainer.appendChild(txtTab);
            tabContainer.appendChild(jsonTab);
            // 创建内容区域
            const txtContentElement = document.createElement('pre');
            txtContentElement.id = 'axtreeTxtContent';
            txtContentElement.className = 'axtree-content';
            txtContentElement.style.textAlign = 'left'; // 确保文本左对齐
            txtContentElement.style.whiteSpace = 'pre'; // 保留所有空格和换行
            const jsonContentElement = document.createElement('pre');
            jsonContentElement.id = 'axtreeJsonContent';
            jsonContentElement.className = 'axtree-content';
            jsonContentElement.style.display = 'none';
            modalContent.appendChild(closeBtn);
            modalContent.appendChild(modalTitle);
            modalContent.appendChild(tabContainer);
            modalContent.appendChild(txtContentElement);
            modalContent.appendChild(jsonContentElement);
            axtreeModal.appendChild(modalContent);
            document.body.appendChild(axtreeModal);
            // 点击模态框外部关闭
            axtreeModal.onclick = (event) => {
                if (event.target === axtreeModal) {
                    axtreeModal.style.display = 'none';
                }
            };
        }
        // 更新模态框内容
        document.getElementById('axtreeModalTitle').textContent = `Axtree - ${requestId}`;
        // 更新文本内容
        const txtContentElement = document.getElementById('axtreeTxtContent');
        txtContentElement.textContent = txtContent;
        // 格式化并高亮JSON
        const formattedJson = JSON.stringify(jsonData, null, 2);
        const jsonContentElement = document.getElementById('axtreeJsonContent');
        jsonContentElement.textContent = formattedJson;
        // 如果有语法高亮库（如highlight.js），可以在这里应用
        if (window.hljs) {
            jsonContentElement.innerHTML = window.hljs.highlight('json', formattedJson).value;
        }
        // 显示模态框
        axtreeModal.style.display = 'block';
    }
 }
 // 初始化可视化器
 document.addEventListener('DOMContentLoaded', () => {
    new PathVisualizer();
 });