run webrl chat ok
8
.gitignore
vendored
|
@ -26,3 +26,11 @@ src/server/tasks/minecraft/vab_minecraft_src/jarvis/stark_tech/MCP-Reborn
|
||||||
src/server/tasks/minecraft/vab_minecraft_src/jarvis/steveI/weights
|
src/server/tasks/minecraft/vab_minecraft_src/jarvis/steveI/weights
|
||||||
src/server/tasks/minecraft/vab_minecraft_src/jarvis/global_configs/envs/*
|
src/server/tasks/minecraft/vab_minecraft_src/jarvis/global_configs/envs/*
|
||||||
!src/server/tasks/minecraft/vab_minecraft_src/jarvis/global_configs/envs/jarvis.yaml
|
!src/server/tasks/minecraft/vab_minecraft_src/jarvis/global_configs/envs/jarvis.yaml
|
||||||
|
|
||||||
|
VAB-WebArena-Lite/config_files/wa/test_webarena
|
||||||
|
VAB-WebArena-Lite/config_files/wa/test_webarena_lite
|
||||||
|
|
||||||
|
VAB-WebArena-Lite/log_files
|
||||||
|
|
||||||
|
VAB-WebArena-Lite/results
|
||||||
|
|
||||||
|
|
1
VAB-WebArena-Lite/.auth/gitlab.reddit_state.json
Normal file
|
@ -0,0 +1 @@
|
||||||
|
{"cookies": [{"name": "_cookie_check", "value": "1", "domain": "localhost", "path": "/login", "expires": -1, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "REMEMBERME", "value": "QXBwXEVudGl0eVxVc2VyOlRXRnlkbVZzYzBkeVlXNTBUV0Z1TVRNMjoxNzc2OTA5MzI5OlltVTFZalF5Tldaa05qaG1Oamt6WWpJMllXRTVOVEE0TW1GaFl6ZGpaV0V4WVdFMk1HSmlNamsyTjJVd01qVXdaakkwTVRSaVlURTNNRGhpTVROa1lnPT0%3D", "domain": "localhost", "path": "/", "expires": 1776909329.082441, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "PHPSESSID", "value": "c002205f152b213deaedc0800c372f3f", "domain": "localhost", "path": "/", "expires": -1, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "known_sign_in", "value": "UlVrNWNuN1g3cDRWdjNiUmJZUVl3OGNZNmJGQmlhYll6WFN0ZDdXNFpaZnBWNk1VLzBFKyt0eDJWQ2F1VkhkUndNR1hjeHIybnNzYlY5RC8wbFVWejJrclpUSUZZR2g1T0ZJQTRVVlZMUGVjKzdiMmhWVk1yR2czb1pzYXAzQWstLW8vYnlFdTlMYmZHUXBTZENLRDVBNlE9PQ%3D%3D--4b1888d4ebbdb533cf162356c2630ea53dd95286", "domain": "localhost", "path": "/", "expires": 1746582930.098554, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "_gitlab_session", "value": "037c2d46106a9cea55da705dce5b5708", "domain": "localhost", "path": "/", "expires": -1, "httpOnly": true, "secure": false, "sameSite": "Lax"}], "origins": []}
|
1
VAB-WebArena-Lite/.auth/gitlab.shopping_admin_state.json
Normal file
|
@ -0,0 +1 @@
|
||||||
|
{"cookies": [{"name": "admin", "value": "1087c582d275f78834da637c08d2dd0c", "domain": "localhost", "path": "/admin", "expires": 1745733329.394644, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "known_sign_in", "value": "UW1ZRyt2cTd2VmltKzMzWGVDOWovVi9HUkwyU2lYQ3BteUdGNUFyNHpNNHpMVldsWEdTZnJ6TDM5ekpxaDFpSTNGTXAvWTBDOGtWRUp6YWlWbE5UdEJXZzM5a0FvUGQ2V0c5dHJib1J6b0VrNWhVQTgvOTZxTnVtd3NHMjBWQTktLXJub3VnR0RISHFkV1Y2QmtzRlg3Nnc9PQ%3D%3D--20fe4b8ba28a2bb065bc6691d0cc2733e25633c4", "domain": "localhost", "path": "/", "expires": 1746582930.401271, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "_gitlab_session", "value": "f98c1667e8eacdb4484308adbc9212fd", "domain": "localhost", "path": "/", "expires": -1, "httpOnly": true, "secure": false, "sameSite": "Lax"}], "origins": []}
|
1
VAB-WebArena-Lite/.auth/gitlab.shopping_state.json
Normal file
|
@ -0,0 +1 @@
|
||||||
|
{"cookies": [{"name": "mage-cache-storage", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "mage-cache-storage-section-invalidation", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "mage-messages", "value": "", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Strict"}, {"name": "recently_viewed_product", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "recently_viewed_product_previous", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "recently_compared_product", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "recently_compared_product_previous", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "product_data_storage", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "section_data_ids", "value": "{%22messages%22:null%2C%22customer%22:null%2C%22compare-products%22:null%2C%22last-ordered-items%22:null%2C%22cart%22:null%2C%22directory-data%22:null%2C%22captcha%22:null%2C%22instant-purchase%22:null%2C%22loggedAsCustomer%22:null%2C%22persistent%22:null%2C%22review%22:null%2C%22wishlist%22:null%2C%22recently_viewed_product%22:null%2C%22recently_compared_product%22:null%2C%22product_data_storage%22:null%2C%22paypal-billing-agreement%22:null}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "private_content_version", "value": "5a1361e59223072bebd2793394207496", "domain": "localhost", "path": "/", "expires": 1779933329.571442, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "PHPSESSID", "value": "177a7b68734a2083018b3fb119634917", "domain": "localhost", "path": "/", "expires": 1776909330.114376, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "X-Magento-Vary", "value": "9bf9a599123e6402b85cde67144717a08b817412", "domain": "localhost", "path": "/", "expires": 1776909330.114451, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "known_sign_in", "value": "VjEvQms0R1Y0WFJwNzhWcXJmWUQzTlM5aEZhS1FLd3pIaGcrRHZoUUMxb2RtK2gzWFVMK1N1YjA0QTJiV1NPL1NwQWYrc3dpRlU3U3Q1Q3p4T0RNbWd4UVl2Vk5pR2dSbVBFTUhrQUhQMnBJOXlxaGVsdG5ycXFkVDQ2dDBBejYtLWNYZ3pJREcxbkVXWjVsZE9LMnIvZkE9PQ%3D%3D--860cb1881ab774ae0647d031b6a58789ca2dc43f", "domain": "localhost", "path": "/", "expires": 1746582931.071908, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "_gitlab_session", "value": "23ff9045fd7ed259aa91a5056469be23", "domain": "localhost", "path": "/", "expires": -1, "httpOnly": true, "secure": false, "sameSite": "Lax"}], "origins": [{"origin": "http://localhost:28082", "localStorage": [{"name": "mage-cache-storage", "value": "{}"}, {"name": "product_data_storage", "value": "{}"}, {"name": "mage-cache-storage-section-invalidation", "value": "{\"messages\":true,\"customer\":true,\"compare-products\":true,\"last-ordered-items\":true,\"cart\":true,\"directory-data\":true,\"captcha\":true,\"instant-purchase\":true,\"loggedAsCustomer\":true,\"persistent\":true,\"review\":true,\"wishlist\":true,\"recently_viewed_product\":true,\"recently_compared_product\":true,\"product_data_storage\":true,\"paypal-billing-agreement\":true}"}, {"name": "mage-cache-timeout", "value": "\"2026-04-23T01:55:28.860Z\""}, {"name": "recently_compared_product_previous", "value": "{}"}, {"name": "recently_viewed_product", "value": "{}"}, {"name": "recently_compared_product", "value": "{}"}, {"name": "recently_viewed_product_previous", "value": "{}"}]}]}
|
1
VAB-WebArena-Lite/.auth/gitlab_state.json
Normal file
|
@ -0,0 +1 @@
|
||||||
|
{"cookies": [{"name": "known_sign_in", "value": "WHpKbmQyNkhlWWhHcGljQmZmVGE2aFl0WFh3VWxGMTRRYjBsa3dNbVJZLzIrYWM5RUpwa0IxYU85Q291WTE5cE9SWVcvdC9xTjNBTFp2ZHdCTGxaaEx6RXpDVm0zS1BCVldyODdMYlZuTWRHNFF0K0JQZXVqcGhPWWVkMWZWMmktLVNCc3F6VlFFQjBFSGN0TnlKUnd2MEE9PQ%3D%3D--752f2d15a69617f851d3140acb5500b2564af08f", "domain": "localhost", "path": "/", "expires": 1746582928.781901, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "_gitlab_session", "value": "85d65f4d47be2142eee2a21e8bbcec79", "domain": "localhost", "path": "/", "expires": -1, "httpOnly": true, "secure": false, "sameSite": "Lax"}], "origins": []}
|
1
VAB-WebArena-Lite/.auth/reddit_state.json
Normal file
|
@ -0,0 +1 @@
|
||||||
|
{"cookies": [{"name": "_cookie_check", "value": "1", "domain": "localhost", "path": "/login", "expires": -1, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "REMEMBERME", "value": "QXBwXEVudGl0eVxVc2VyOlRXRnlkbVZzYzBkeVlXNTBUV0Z1TVRNMjoxNzc2OTA5MzI4OlpUQTRNV1F3WldNMU1HSmhOalUwTTJaaE1qWmxOVGs1TldNMlpUVTFNVEUyT1RRNFpqTXdaR1F3TVRRelpUTXhPVEl3TjJSa09USm1PREF4Wm1WaE5nPT0%3D", "domain": "localhost", "path": "/", "expires": 1776909328.956723, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "PHPSESSID", "value": "4a07cf057d5020f791451af6c2bc29e0", "domain": "localhost", "path": "/", "expires": -1, "httpOnly": true, "secure": false, "sameSite": "Lax"}], "origins": []}
|
|
@ -0,0 +1 @@
|
||||||
|
{"cookies": [{"name": "admin", "value": "732e6b7c1a7ab1de0d1a4ab105b549b6", "domain": "localhost", "path": "/admin", "expires": 1745733331.781848, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "mage-cache-storage", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "mage-cache-storage-section-invalidation", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "mage-messages", "value": "", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Strict"}, {"name": "recently_viewed_product", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "recently_viewed_product_previous", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "recently_compared_product", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "recently_compared_product_previous", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "product_data_storage", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "section_data_ids", "value": "{%22messages%22:null%2C%22customer%22:null%2C%22compare-products%22:null%2C%22last-ordered-items%22:null%2C%22cart%22:null%2C%22directory-data%22:null%2C%22captcha%22:null%2C%22instant-purchase%22:null%2C%22loggedAsCustomer%22:null%2C%22persistent%22:null%2C%22review%22:null%2C%22wishlist%22:null%2C%22recently_viewed_product%22:null%2C%22recently_compared_product%22:null%2C%22product_data_storage%22:null%2C%22paypal-billing-agreement%22:null}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "private_content_version", "value": "c694b83ea1426b503effa8c412f7ae6d", "domain": "localhost", "path": "/", "expires": 1779933329.584947, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "PHPSESSID", "value": "ac8e0a869234bb4b31a917ff6a016ce9", "domain": "localhost", "path": "/", "expires": 1776909330.149523, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "X-Magento-Vary", "value": "9bf9a599123e6402b85cde67144717a08b817412", "domain": "localhost", "path": "/", "expires": 1776909330.149613, "httpOnly": true, "secure": false, "sameSite": "Lax"}], "origins": [{"origin": "http://localhost:28082", "localStorage": [{"name": "mage-cache-storage", "value": "{}"}, {"name": "product_data_storage", "value": "{}"}, {"name": "mage-cache-storage-section-invalidation", "value": "{\"messages\":true,\"customer\":true,\"compare-products\":true,\"last-ordered-items\":true,\"cart\":true,\"directory-data\":true,\"captcha\":true,\"instant-purchase\":true,\"loggedAsCustomer\":true,\"persistent\":true,\"review\":true,\"wishlist\":true,\"recently_viewed_product\":true,\"recently_compared_product\":true,\"product_data_storage\":true,\"paypal-billing-agreement\":true}"}, {"name": "mage-cache-timeout", "value": "\"2026-04-23T01:55:28.879Z\""}, {"name": "recently_compared_product_previous", "value": "{}"}, {"name": "recently_viewed_product", "value": "{}"}, {"name": "recently_compared_product", "value": "{}"}, {"name": "recently_viewed_product_previous", "value": "{}"}]}]}
|
1
VAB-WebArena-Lite/.auth/shopping_admin_state.json
Normal file
|
@ -0,0 +1 @@
|
||||||
|
{"cookies": [{"name": "admin", "value": "753cf7bedc7f97223f77af0369ec4ccf", "domain": "localhost", "path": "/admin", "expires": 1745733329.396607, "httpOnly": true, "secure": false, "sameSite": "Lax"}], "origins": []}
|
1
VAB-WebArena-Lite/.auth/shopping_state.json
Normal file
|
@ -0,0 +1 @@
|
||||||
|
{"cookies": [{"name": "mage-cache-storage", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "mage-cache-storage-section-invalidation", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "mage-messages", "value": "", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Strict"}, {"name": "recently_viewed_product", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "recently_viewed_product_previous", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "recently_compared_product", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "recently_compared_product_previous", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "product_data_storage", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "section_data_ids", "value": "{%22messages%22:null%2C%22customer%22:null%2C%22compare-products%22:null%2C%22last-ordered-items%22:null%2C%22cart%22:null%2C%22directory-data%22:null%2C%22captcha%22:null%2C%22instant-purchase%22:null%2C%22loggedAsCustomer%22:null%2C%22persistent%22:null%2C%22review%22:null%2C%22wishlist%22:null%2C%22recently_viewed_product%22:null%2C%22recently_compared_product%22:null%2C%22product_data_storage%22:null%2C%22paypal-billing-agreement%22:null}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "private_content_version", "value": "db7c8357df43bdda4423b1d0be6cd1c0", "domain": "localhost", "path": "/", "expires": 1779933329.561383, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "PHPSESSID", "value": "9d34565b19e00a4310aba625db3a7fd7", "domain": "localhost", "path": "/", "expires": 1776909330.076342, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "X-Magento-Vary", "value": "9bf9a599123e6402b85cde67144717a08b817412", "domain": "localhost", "path": "/", "expires": 1776909330.076442, "httpOnly": true, "secure": false, "sameSite": "Lax"}], "origins": [{"origin": "http://localhost:28082", "localStorage": [{"name": "mage-cache-storage", "value": "{}"}, {"name": "product_data_storage", "value": "{}"}, {"name": "mage-cache-storage-section-invalidation", "value": "{\"messages\":true,\"customer\":true,\"compare-products\":true,\"last-ordered-items\":true,\"cart\":true,\"directory-data\":true,\"captcha\":true,\"instant-purchase\":true,\"loggedAsCustomer\":true,\"persistent\":true,\"review\":true,\"wishlist\":true,\"recently_viewed_product\":true,\"recently_compared_product\":true,\"product_data_storage\":true,\"paypal-billing-agreement\":true}"}, {"name": "mage-cache-timeout", "value": "\"2026-04-23T01:55:28.896Z\""}, {"name": "recently_compared_product_previous", "value": "{}"}, {"name": "recently_viewed_product", "value": "{}"}, {"name": "recently_compared_product", "value": "{}"}, {"name": "recently_viewed_product_previous", "value": "{}"}]}]}
|
6
VAB-WebArena-Lite/CITATION.cff
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
@article{koh2024visualwebarena,
|
||||||
|
title={VisualWebArena: Evaluating Multimodal Agents on Realistic Visual Web Tasks},
|
||||||
|
author={Koh, Jing Yu and Lo, Robert and Jang, Lawrence and Duvvur, Vikram and Lim, Ming Chong and Huang, Po-Yu and Neubig, Graham and Zhou, Shuyan and Salakhutdinov, Ruslan and Fried, Daniel},
|
||||||
|
journal={arXiv preprint arXiv:24xx.xxxxx},
|
||||||
|
year={2024}
|
||||||
|
}
|
20
VAB-WebArena-Lite/LICENSE
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
Copyright (c) 2024 Jing Yu Koh, Robert Lo, Lawrence Jang, Vikram Duvvur, Ming Chong Lim, and Po-Yu Huang
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||||
|
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||||
|
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||||
|
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
8
VAB-WebArena-Lite/agent/__init__.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
from .agent import (
|
||||||
|
Agent,
|
||||||
|
PromptAgent,
|
||||||
|
TeacherForcingAgent,
|
||||||
|
construct_agent,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = ["Agent", "TeacherForcingAgent", "PromptAgent", "construct_agent"]
|
1
VAB-WebArena-Lite/agent/prompts/__init__.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
from .prompt_constructor import *
|
|
@ -0,0 +1,27 @@
|
||||||
|
{
|
||||||
|
"intro": "You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue.\n\nHere's the information you'll have:\nThe user's objective: This is the task you're trying to complete.\nThe current web page's accessibility tree: This is a simplified representation of the webpage, providing key information.\nThe current web page's URL: This is the page you're currently navigating.\nThe open tabs: These are the tabs you have open.\nThe previous action: This is the action you just performed. It may be helpful to track your progress.\n\nThe actions you can perform fall into several categories:\n\nPage Operation Actions:\n`click [id]`: This action clicks on an element with a specific id on the webpage.\n`type [id] [content] [press_enter_after=0|1]`: Use this to type the content into the field with id. By default, the \"Enter\" key is pressed after typing unless press_enter_after is set to 0.\n`hover [id]`: Hover over an element with id.\n`press [key_comb]`: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).\n`scroll [direction=down|up]`: Scroll the page up or down.\n\nTab Management Actions:\n`new_tab`: Open a new, empty browser tab.\n`tab_focus [tab_index]`: Switch the browser's focus to a specific tab using its index.\n`close_tab`: Close the currently active tab.\n\nURL Navigation Actions:\n`goto [url]`: Navigate to a specific URL.\n`go_back`: Navigate to the previously viewed page.\n`go_forward`: Navigate to the next page (if a previous 'go_back' action was performed).\n\nCompletion Action:\n`stop [answer]`: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket. If you believe the task is impossible to complete, provide the answer as \"N/A\" in the bracket.\n\nHomepage:\nIf you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.\nhttp://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites.\n\nTo be successful, it is very important to follow the following rules:\n1. You should only issue an action that is valid given the current observation\n2. You should only issue one action at a time.\n3. You should follow the examples to reason step by step and then issue the next action.\n4. Generate the action in the correct format. Start with a \"In summary, the next action I will perform is\" phrase, followed by action inside ``````. For example, \"In summary, the next action I will perform is ```click [1234]```\".\n5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.",
|
||||||
|
"examples": [
|
||||||
|
[
|
||||||
|
"OBSERVATION:\n[1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)'\n\t\t[1749] StaticText '$279.49'\n\t\t[1757] button 'Add to Cart'\n\t\t[1760] button 'Add to Wish List'\n\t\t[1761] button 'Add to Compare'\nURL: http://onestopmarket.com/office-products/office-electronics.html\nOBJECTIVE: What is the price of HP Inkjet Fax Machine\nPREVIOUS ACTION: None",
|
||||||
|
"Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"OBSERVATION:\n[164] textbox 'Search' focused: True required: False\n[171] button 'Go'\n[174] link 'Find directions between two points'\n[212] heading 'Search Results'\n[216] button 'Close'\nURL: http://openstreetmap.org\nOBJECTIVE: Show me the restaurants near CMU\nPREVIOUS ACTION: None",
|
||||||
|
"Let's think step-by-step. This page has a search box whose ID is [164]. According to the nominatim rule of openstreetmap, I can search for the restaurants near a location by \"restaurants near\". I can submit my typing by pressing the Enter afterwards. In summary, the next action I will perform is ```type [164] [restaurants near CMU] [1]```"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"template": "OBSERVATION:\n{observation}\nURL: {url}\nOBJECTIVE: {objective}\nPREVIOUS ACTION: {previous_action}",
|
||||||
|
"meta_data": {
|
||||||
|
"observation": "accessibility_tree",
|
||||||
|
"action_type": "id_accessibility_tree",
|
||||||
|
"keywords": [
|
||||||
|
"url",
|
||||||
|
"objective",
|
||||||
|
"observation",
|
||||||
|
"previous_action"
|
||||||
|
],
|
||||||
|
"prompt_constructor": "CoTPromptConstructor",
|
||||||
|
"answer_phrase": "In summary, the next action I will perform is",
|
||||||
|
"action_splitter": "```"
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,27 @@
|
||||||
|
{
|
||||||
|
"intro": "You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue.\n\nHere's the information you'll have:\nThe user's objective: This is the task you're trying to complete.\nThe current web page's accessibility tree: This is a simplified representation of the webpage, providing key information.\nThe current web page's URL: This is the page you're currently navigating.\nThe open tabs: These are the tabs you have open.\nThe previous action: This is the action you just performed. It may be helpful to track your progress.\n\nThe actions you can perform fall into several categories:\n\nPage Operation Actions:\n`click [id]`: This action clicks on an element with a specific id on the webpage.\n`type [id] [content] [press_enter_after=0|1]`: Use this to type the content into the field with id. By default, the \"Enter\" key is pressed after typing unless press_enter_after is set to 0.\n`hover [id]`: Hover over an element with id.\n`press [key_comb]`: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).\n`scroll [direction=down|up]`: Scroll the page up or down.\n\nTab Management Actions:\n`new_tab`: Open a new, empty browser tab.\n`tab_focus [tab_index]`: Switch the browser's focus to a specific tab using its index.\n`close_tab`: Close the currently active tab.\n\nURL Navigation Actions:\n`goto [url]`: Navigate to a specific URL.\n`go_back`: Navigate to the previously viewed page.\n`go_forward`: Navigate to the next page (if a previous 'go_back' action was performed).\n\nCompletion Action:\n`stop [answer]`: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket.\n\nHomepage:\nIf you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.\nhttp://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites.\n\nTo be successful, it is very important to follow the following rules:\n1. You should only issue an action that is valid given the current observation\n2. You should only issue one action at a time.\n3. You should follow the examples to reason step by step and then issue the next action.\n4. Generate the action in the correct format. Start with a \"In summary, the next action I will perform is\" phrase, followed by action inside ``````. For example, \"In summary, the next action I will perform is ```click [1234]```\".\n5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.",
|
||||||
|
"examples": [
|
||||||
|
[
|
||||||
|
"OBSERVATION:\n[1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)'\n\t\t[1749] StaticText '$279.49'\n\t\t[1757] button 'Add to Cart'\n\t\t[1760] button 'Add to Wish List'\n\t\t[1761] button 'Add to Compare'\nURL: http://onestopmarket.com/office-products/office-electronics.html\nOBJECTIVE: What is the price of HP Inkjet Fax Machine\nPREVIOUS ACTION: None",
|
||||||
|
"Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"OBSERVATION:\n[164] textbox 'Search' focused: True required: False\n[171] button 'Go'\n[174] link 'Find directions between two points'\n[212] heading 'Search Results'\n[216] button 'Close'\nURL: http://openstreetmap.org\nOBJECTIVE: Show me the restaurants near CMU\nPREVIOUS ACTION: None",
|
||||||
|
"Let's think step-by-step. This page has a search box whose ID is [164]. According to the nominatim rule of openstreetmap, I can search for the restaurants near a location by \"restaurants near\". I can submit my typing by pressing the Enter afterwards. In summary, the next action I will perform is ```type [164] [restaurants near CMU] [1]```"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"template": "OBSERVATION:\n{observation}\nURL: {url}\nOBJECTIVE: {objective}\nPREVIOUS ACTION: {previous_action}",
|
||||||
|
"meta_data": {
|
||||||
|
"observation": "accessibility_tree",
|
||||||
|
"action_type": "id_accessibility_tree",
|
||||||
|
"keywords": [
|
||||||
|
"url",
|
||||||
|
"objective",
|
||||||
|
"observation",
|
||||||
|
"previous_action"
|
||||||
|
],
|
||||||
|
"prompt_constructor": "CoTPromptConstructor",
|
||||||
|
"answer_phrase": "In summary, the next action I will perform is",
|
||||||
|
"action_splitter": "```"
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,31 @@
|
||||||
|
{
|
||||||
|
"intro": "You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue.\n\nHere's the information you'll have:\nThe user's objective: This is the task you're trying to complete.\nThe current web page's accessibility tree: This is a simplified representation of the webpage, providing key information.\nThe current web page's URL: This is the page you're currently navigating.\nThe open tabs: These are the tabs you have open.\nThe previous action: This is the action you just performed. It may be helpful to track your progress.\n\nThe actions you can perform fall into several categories:\n\nPage Operation Actions:\n```click [id]```: This action clicks on an element with a specific id on the webpage.\n```type [id] [content]```: Use this to type the content into the field with id. By default, the \"Enter\" key is pressed after typing unless press_enter_after is set to 0, i.e., ```type [id] [content] [0]```.\n```hover [id]```: Hover over an element with id.\n```press [key_comb]```: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).\n```scroll [down]``` or ```scroll [up]```: Scroll the page up or down.\n\nTab Management Actions:\n```new_tab```: Open a new, empty browser tab.\n```tab_focus [tab_index]```: Switch the browser's focus to a specific tab using its index.\n```close_tab```: Close the currently active tab.\n\nURL Navigation Actions:\n```goto [url]```: Navigate to a specific URL.\n```go_back```: Navigate to the previously viewed page.\n```go_forward```: Navigate to the next page (if a previous 'go_back' action was performed).\n\nCompletion Action:\n```stop [answer]```: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket.\n\nHomepage:\nIf you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.\nhttp://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites.\n\nTo be successful, it is very important to follow the following rules:\n1. You should only issue an action that is valid given the current observation\n2. You should only issue one action at a time.\n3. You should follow the examples to reason step by step and then issue the next action.\n4. Generate the action in the correct format. Start with a \"In summary, the next action I will perform is\" phrase, followed by action inside ``````. For example, \"In summary, the next action I will perform is ```click [1234]```\".\n5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.",
|
||||||
|
"examples": [
|
||||||
|
[
|
||||||
|
"OBSERVATION:\n[1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)'\n[1749] StaticText '$279.49'\n[1757] button 'Add to Cart'\n[1760] button 'Add to Wish List'\n[1761] button 'Add to Compare'\nURL: http://onestopmarket.com/office-products/office-electronics.html\nOBJECTIVE: What is the price of HP Inkjet Fax Machine?\nPREVIOUS ACTION: None",
|
||||||
|
"Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"OBSERVATION:\n[204] heading '/f/food'\n[593] heading '[homemade] Obligatory Halloween Pumpkin Loaf!'\n\t[942] link '[homemade] Obligatory Halloween Pumpkin Loaf!'\n[945] StaticText 'Submitted by '\n[30] link 'kneechalice' expanded: False\n[1484] StaticText 't3_yid9lu'\n[949] time 'October 31, 2022 at 10:10:03 AM EDT'\n\t[1488] StaticText '1 year ago'\n[1489] link '45 comments'\n[605] heading '[I ate] Maple Pecan Croissant'\n\t[963] link '[I ate] Maple Pecan Croissant'\n[966] StaticText 'Submitted by '\n[37] link 'AccordingtoJP' expanded: False\n[1494] StaticText 't3_y3hrpn'\n[970] time 'October 13, 2022 at 10:41:09 PM EDT'\n\t[1498] StaticText '1 year ago'\n[1499] link '204 comments'\nURL: http://reddit.com\nOBJECTIVE: Tell me what the top comment on the croissant post says.\nPREVIOUS ACTION: None",
|
||||||
|
"Let's think step-by-step. This page has a post titled '[I ate] Maple Pecan Croissant', which is the post mentioned in the objective. In order to find the top comment, I will navigate into the comments section of the post. In summary, the next action I will perform is ```click [1499]```"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"OBSERVATION:\n[42] link 'My account'\n[43] link 'Logout'\n[44] link 'Publish Ad'\n[25] heading 'What are you looking for today?'\n[143] StaticText 'Keyword'\n[81] textbox 'e.g., a blue used car' required: False\n[146] StaticText 'Category'\n[28] heading 'Latest Listings'\n[86] link 'Atlas Powered Audio System w/ Tripod'\n\t[176] img 'Atlas Powered Audio System w/ Tripod'\n[511] StaticText '150.00 $'\n[88] link 'Neptune Gaming Console'\n\t[178] img 'Neptune Gaming Console'\n[515] StaticText '350.00 $'\nURL: http://classifieds.com\nOBJECTIVE: Help me find the cheapest dark colored guitar.\nPREVIOUS ACTION: None",
|
||||||
|
"Let's think step-by-step. The objective is to find the cheapest dark colored guitar on the site. The site has a search box whose ID is [81]. I can search for guitars by entering \"guitar\". I can submit this by pressing the Enter afterwards. In summary, the next action I will perform is ```type [81] [guitar] [1]```"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"template": "OBSERVATION:\n{observation}\nURL: {url}\nOBJECTIVE: {objective}\nPREVIOUS ACTION: {previous_action}",
|
||||||
|
"meta_data": {
|
||||||
|
"observation": "accessibility_tree",
|
||||||
|
"action_type": "id_accessibility_tree",
|
||||||
|
"keywords": [
|
||||||
|
"url",
|
||||||
|
"objective",
|
||||||
|
"observation",
|
||||||
|
"previous_action"
|
||||||
|
],
|
||||||
|
"prompt_constructor": "CoTPromptConstructor",
|
||||||
|
"answer_phrase": "In summary, the next action I will perform is",
|
||||||
|
"action_splitter": "```"
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
{
|
||||||
|
"intro": "You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue.\n\nHere's the information you'll have:\nThe user's objective: This is the task you're trying to complete.\nThe current web page's accessibility tree: This is a simplified representation of the webpage, providing key information.\nThe current web page's URL: This is the page you're currently navigating.\nThe open tabs: These are the tabs you have open.\nThe previous action: This is the action you just performed. It may be helpful to track your progress.\n\nThe actions you can perform fall into several categories:\n\nPage Operation Actions:\n```click [id]```: This action clicks on an element with a specific id on the webpage.\n```type [id] [content]```: Use this to type the content into the field with id. By default, the \"Enter\" key is pressed after typing unless press_enter_after is set to 0, i.e., ```type [id] [content] [0]```.\n```hover [id]```: Hover over an element with id.\n```press [key_comb]```: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).\n```scroll [down]``` or ```scroll [up]```: Scroll the page up or down.\n\nTab Management Actions:\n```new_tab```: Open a new, empty browser tab.\n```tab_focus [tab_index]```: Switch the browser's focus to a specific tab using its index.\n```close_tab```: Close the currently active tab.\n\nURL Navigation Actions:\n```goto [url]```: Navigate to a specific URL.\n```go_back```: Navigate to the previously viewed page.\n```go_forward```: Navigate to the next page (if a previous 'go_back' action was performed).\n\nCompletion Action:\n```stop [answer]```: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket.\n\nHomepage:\nIf you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.\nhttp://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites.\n\nTo be successful, it is very important to follow the following rules:\n1. You should only issue an action that is valid given the current observation\n2. You should only issue one action at a time.\n3. You should follow the examples to reason step by step and then issue the next action.\n4. Generate the action in the correct format. Start with a \"In summary, the next action I will perform is\" phrase, followed by action inside ``````. For example, \"In summary, the next action I will perform is ```click [1234]```\".\n5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.",
|
||||||
|
"examples": [
|
||||||
|
[
|
||||||
|
"OBSERVATION:\n[1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)'\n[1749] StaticText '$279.49'\n[1757] button 'Add to Cart'\n[1760] button 'Add to Wish List'\n[1761] button 'Add to Compare'\nURL: http://onestopmarket.com/office-products/office-electronics.html\nOBJECTIVE: What is the price of HP Inkjet Fax Machine?\nPREVIOUS ACTION: None",
|
||||||
|
"Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```",
|
||||||
|
"agent/prompts/multimodal_examples/multimodal_example1.png"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"OBSERVATION:\n[204] heading '/f/food'\n[593] heading '[homemade] Obligatory Halloween Pumpkin Loaf!'\n\t[942] link '[homemade] Obligatory Halloween Pumpkin Loaf!'\n[945] StaticText 'Submitted by '\n[30] link 'kneechalice' expanded: False\n[1484] StaticText 't3_yid9lu'\n[949] time 'October 31, 2022 at 10:10:03 AM EDT'\n\t[1488] StaticText '1 year ago'\n[1489] link '45 comments'\n[605] heading '[I ate] Maple Pecan Croissant'\n\t[963] link '[I ate] Maple Pecan Croissant'\n[966] StaticText 'Submitted by '\n[37] link 'AccordingtoJP' expanded: False\n[1494] StaticText 't3_y3hrpn'\n[970] time 'October 13, 2022 at 10:41:09 PM EDT'\n\t[1498] StaticText '1 year ago'\n[1499] link '204 comments'\nURL: http://reddit.com\nOBJECTIVE: Tell me what the top comment on the croissant post says.\nPREVIOUS ACTION: None",
|
||||||
|
"Let's think step-by-step. This page has a post titled '[I ate] Maple Pecan Croissant', which is the post mentioned in the objective. In order to find the top comment, I will navigate into the comments section of the post. In summary, the next action I will perform is ```click [1499]```",
|
||||||
|
"agent/prompts/multimodal_examples/multimodal_example2.png"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"OBSERVATION:\n[42] link 'My account'\n[43] link 'Logout'\n[44] link 'Publish Ad'\n[25] heading 'What are you looking for today?'\n[143] StaticText 'Keyword'\n[81] textbox 'e.g., a blue used car' required: False\n[146] StaticText 'Category'\n[28] heading 'Latest Listings'\n[86] link 'Atlas Powered Audio System w/ Tripod'\n\t[176] img 'Atlas Powered Audio System w/ Tripod'\n[511] StaticText '150.00 $'\n[88] link 'Neptune Gaming Console'\n\t[178] img 'Neptune Gaming Console'\n[515] StaticText '350.00 $'\nURL: http://classifieds.com\nOBJECTIVE: Help me find the cheapest dark colored guitar.\nPREVIOUS ACTION: None",
|
||||||
|
"Let's think step-by-step. The objective is to find the cheapest dark colored guitar on the site. The site has a search box whose ID is [81]. I can search for guitars by entering \"guitar\". I can submit this by pressing the Enter afterwards. In summary, the next action I will perform is ```type [81] [guitar] [1]```",
|
||||||
|
"agent/prompts/multimodal_examples/multimodal_example3.png"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"template": "OBSERVATION:\n{observation}\nURL: {url}\nOBJECTIVE: {objective}\nPREVIOUS ACTION: {previous_action}",
|
||||||
|
"meta_data": {
|
||||||
|
"observation": "accessibility_tree",
|
||||||
|
"action_type": "id_accessibility_tree",
|
||||||
|
"keywords": [
|
||||||
|
"url",
|
||||||
|
"objective",
|
||||||
|
"observation",
|
||||||
|
"previous_action"
|
||||||
|
],
|
||||||
|
"prompt_constructor": "MultimodalCoTPromptConstructor",
|
||||||
|
"answer_phrase": "In summary, the next action I will perform is",
|
||||||
|
"action_splitter": "```"
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
{
|
||||||
|
"intro": "You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue.\n\nHere's the information you'll have:\nThe user's objective: This is the task you're trying to complete.\nThe current web page screenshot: This is a screenshot of the webpage, with each interactable element assigned a unique numerical id. Each bounding box and its respective id shares the same color.\nThe observation, which lists the IDs of all interactable elements on the current web page with their text content if any, in the format [id] [tagType] [text content]. tagType is the type of the element, such as button, link, or textbox. text content is the text content of the element. For example, [1234] [button] ['Add to Cart'] means that there is a button with id 1234 and text content 'Add to Cart' on the current web page. [] [StaticText] [text] means that the element is of some text that is not interactable.\nThe current web page's URL: This is the page you're currently navigating.\nThe open tabs: These are the tabs you have open.\nThe previous action: This is the action you just performed. It may be helpful to track your progress.\n\nThe actions you can perform fall into several categories:\n\nPage Operation Actions:\n```click [id]```: This action clicks on an element with a specific id on the webpage.\n```type [id] [content]```: Use this to type the content into the field with id. By default, the \"Enter\" key is pressed after typing unless press_enter_after is set to 0, i.e., ```type [id] [content] [0]```.\n```hover [id]```: Hover over an element with id.\n```press [key_comb]```: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).\n```scroll [down]``` or ```scroll [up]```: Scroll the page up or down.\n\nTab Management Actions:\n```new_tab```: Open a new, empty browser tab.\n```tab_focus [tab_index]```: Switch the browser's focus to a specific tab using its index.\n```close_tab```: Close the currently active tab.\n\nURL Navigation Actions:\n```goto [url]```: Navigate to a specific URL.\n```go_back```: Navigate to the previously viewed page.\n```go_forward```: Navigate to the next page (if a previous 'go_back' action was performed).\n\nCompletion Action:\n```stop [answer]```: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket.\n\nHomepage:\nIf you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.\nhttp://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites.\n\nTo be successful, it is very important to follow the following rules:\n1. You should only issue an action that is valid given the current observation\n2. You should only issue one action at a time.\n3. You should follow the examples to reason step by step and then issue the next action.\n4. Generate the action in the correct format. Start with a \"In summary, the next action I will perform is\" phrase, followed by action inside ``````. For example, \"In summary, the next action I will perform is ```click [1234]```\".\n5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.",
|
||||||
|
"examples": [
|
||||||
|
[
|
||||||
|
"OBSERVATION:\n[31] [IMG] [Image, description: hp fx-7010dn fax machine, url: http://ec2-3-13-232-171.us-east-2.compute.amazonaws.com:7770/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B08GKZ3ZKD.0.jpg]\n[32] [A] [HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)]\n[] [StaticText] [$279.49]\n[33] [BUTTON] [Add to Cart]\n[34] [A] [Add to Wish List]\n[35] [A] [Add to Compare]\nURL: http://onestopmarket.com/office-products/office-electronics.html\nOBJECTIVE: What is the price of HP Inkjet Fax Machine?\nPREVIOUS ACTION: None",
|
||||||
|
"Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```",
|
||||||
|
"agent/prompts/som_examples/som_example1.png"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"OBSERVATION:\n[] [StaticText] [/f/food]\n[] [StaticText] [[homemade] Obligatory Halloween Pumpkin Loaf!\tSubmitted by\tkneechalice\tt3_yid9lu\t1 year ago]\n[9] [IMG] []\n[] [StaticText] [Submitted by\tkneechalice\tt3_yid9lu\t1 year ago]\n[10] [A] [kneechalice]\n[11] [A] [45 comments]\n[] [StaticText] [[I ate] Maple Pecan Croissant\tSubmitted by\tAccordingtoJP\tt3_y3hrpn\t1 year ago]\n[14] [IMG] []\n[] [StaticText] [Submitted by\tAccordingtoJP\tt3_y3hrpn\t1 year ago]\n[15] [A] [AccordingtoJP]\n[16] [A] [204 comments]\nURL: http://reddit.com\nOBJECTIVE: Tell me what the top comment on the croissant post says.\nPREVIOUS ACTION: None",
|
||||||
|
"Let's think step-by-step. This page has a post titled '[I ate] Maple Pecan Croissant', which is the post mentioned in the objective. In order to find the top comment, I will navigate into the comments section of the post. In summary, the next action I will perform is ```click [11]```",
|
||||||
|
"agent/prompts/som_examples/som_example2.png"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"OBSERVATION:\n[] [StaticText] [What are you looking for today?]\n[5] [INPUT] []\n[6] [SELECT] [Select a category]\n[7] [BUTTON] [Search]\n[] [StaticText] [Latest Listings]\n[] [StaticText] [Atlas Powered Audio System w/ Tripod\t150.00 $\tMusic instruments\tBorough of Red Lion (Pennsylvania)\t2023/11/16]\n[8] [IMG] [Atlas Powered Audio System w/ Tripod]\n[9] [A] [Atlas Powered Audio System w/ Tripod]\n[] [StaticText] [150.00 $]\n[] [StaticText] [Neptune Gaming Console\t350.00 $\tVideo gaming\tPennwyn (Pennsylvania)\t2023/11/16]\n[10] [IMG] [Neptune Gaming Console]\n[11] [A] [Neptune Gaming Console]\n[] [StaticText] [350.00 $]\nURL: http://classifieds.com\nOBJECTIVE: Help me find the cheapest dark colored guitar.\nPREVIOUS ACTION: None",
|
||||||
|
"Let's think step-by-step. The objective is to find the cheapest dark colored guitar on the site. The site has a search box whose ID is [5]. I can search for guitars by entering \"guitar\". I can submit this by pressing the Enter afterwards. In summary, the next action I will perform is ```type [5] [guitar] [1]```",
|
||||||
|
"agent/prompts/som_examples/som_example3.png"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"template": "OBSERVATION: {observation}\nURL: {url}\nOBJECTIVE: {objective}\nPREVIOUS ACTION: {previous_action}",
|
||||||
|
"meta_data": {
|
||||||
|
"observation": "image_som",
|
||||||
|
"action_type": "som",
|
||||||
|
"keywords": [
|
||||||
|
"url",
|
||||||
|
"objective",
|
||||||
|
"observation",
|
||||||
|
"previous_action"
|
||||||
|
],
|
||||||
|
"prompt_constructor": "MultimodalCoTPromptConstructor",
|
||||||
|
"answer_phrase": "In summary, the next action I will perform is",
|
||||||
|
"action_splitter": "```"
|
||||||
|
}
|
||||||
|
}
|
After Width: | Height: | Size: 253 KiB |
After Width: | Height: | Size: 210 KiB |
After Width: | Height: | Size: 312 KiB |
82
VAB-WebArena-Lite/agent/prompts/raw/p_cot_id_actree_2s.py
Normal file
|
@ -0,0 +1,82 @@
|
||||||
|
prompt = {
|
||||||
|
"intro": """You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue.
|
||||||
|
|
||||||
|
Here's the information you'll have:
|
||||||
|
The user's objective: This is the task you're trying to complete.
|
||||||
|
The current web page's accessibility tree: This is a simplified representation of the webpage, providing key information.
|
||||||
|
The current web page's URL: This is the page you're currently navigating.
|
||||||
|
The open tabs: These are the tabs you have open.
|
||||||
|
The previous action: This is the action you just performed. It may be helpful to track your progress.
|
||||||
|
|
||||||
|
The actions you can perform fall into several categories:
|
||||||
|
|
||||||
|
Page Operation Actions:
|
||||||
|
`click [id]`: This action clicks on an element with a specific id on the webpage.
|
||||||
|
`type [id] [content] [press_enter_after=0|1]`: Use this to type the content into the field with id. By default, the "Enter" key is pressed after typing unless press_enter_after is set to 0.
|
||||||
|
`hover [id]`: Hover over an element with id.
|
||||||
|
`press [key_comb]`: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).
|
||||||
|
`scroll [direction=down|up]`: Scroll the page up or down.
|
||||||
|
|
||||||
|
Tab Management Actions:
|
||||||
|
`new_tab`: Open a new, empty browser tab.
|
||||||
|
`tab_focus [tab_index]`: Switch the browser's focus to a specific tab using its index.
|
||||||
|
`close_tab`: Close the currently active tab.
|
||||||
|
|
||||||
|
URL Navigation Actions:
|
||||||
|
`goto [url]`: Navigate to a specific URL.
|
||||||
|
`go_back`: Navigate to the previously viewed page.
|
||||||
|
`go_forward`: Navigate to the next page (if a previous 'go_back' action was performed).
|
||||||
|
|
||||||
|
Completion Action:
|
||||||
|
`stop [answer]`: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket. If you believe the task is impossible to complete, provide the answer as "N/A" in the bracket.
|
||||||
|
|
||||||
|
Homepage:
|
||||||
|
If you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.
|
||||||
|
http://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites.
|
||||||
|
|
||||||
|
To be successful, it is very important to follow the following rules:
|
||||||
|
1. You should only issue an action that is valid given the current observation
|
||||||
|
2. You should only issue one action at a time.
|
||||||
|
3. You should follow the examples to reason step by step and then issue the next action.
|
||||||
|
4. Generate the action in the correct format. Start with a "In summary, the next action I will perform is" phrase, followed by action inside ``````. For example, "In summary, the next action I will perform is ```click [1234]```".
|
||||||
|
5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.""",
|
||||||
|
"examples": [
|
||||||
|
(
|
||||||
|
"""OBSERVATION:
|
||||||
|
[1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)'
|
||||||
|
[1749] StaticText '$279.49'
|
||||||
|
[1757] button 'Add to Cart'
|
||||||
|
[1760] button 'Add to Wish List'
|
||||||
|
[1761] button 'Add to Compare'
|
||||||
|
URL: http://onestopmarket.com/office-products/office-electronics.html
|
||||||
|
OBJECTIVE: What is the price of HP Inkjet Fax Machine
|
||||||
|
PREVIOUS ACTION: None""",
|
||||||
|
"Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"""OBSERVATION:
|
||||||
|
[164] textbox 'Search' focused: True required: False
|
||||||
|
[171] button 'Go'
|
||||||
|
[174] link 'Find directions between two points'
|
||||||
|
[212] heading 'Search Results'
|
||||||
|
[216] button 'Close'
|
||||||
|
URL: http://openstreetmap.org
|
||||||
|
OBJECTIVE: Show me the restaurants near CMU
|
||||||
|
PREVIOUS ACTION: None""",
|
||||||
|
"Let's think step-by-step. This page has a search box whose ID is [164]. According to the nominatim rule of openstreetmap, I can search for the restaurants near a location by \"restaurants near\". I can submit my typing by pressing the Enter afterwards. In summary, the next action I will perform is ```type [164] [restaurants near CMU] [1]```",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
"template": """OBSERVATION:
|
||||||
|
{observation}
|
||||||
|
URL: {url}
|
||||||
|
OBJECTIVE: {objective}
|
||||||
|
PREVIOUS ACTION: {previous_action}""",
|
||||||
|
"meta_data": {
|
||||||
|
"observation": "accessibility_tree",
|
||||||
|
"action_type": "id_accessibility_tree",
|
||||||
|
"keywords": ["url", "objective", "observation", "previous_action"],
|
||||||
|
"prompt_constructor": "CoTPromptConstructor",
|
||||||
|
"answer_phrase": "In summary, the next action I will perform is",
|
||||||
|
"action_splitter": "```"
|
||||||
|
},
|
||||||
|
}
|
|
@ -0,0 +1,82 @@
|
||||||
|
prompt = {
|
||||||
|
"intro": """You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue.
|
||||||
|
|
||||||
|
Here's the information you'll have:
|
||||||
|
The user's objective: This is the task you're trying to complete.
|
||||||
|
The current web page's accessibility tree: This is a simplified representation of the webpage, providing key information.
|
||||||
|
The current web page's URL: This is the page you're currently navigating.
|
||||||
|
The open tabs: These are the tabs you have open.
|
||||||
|
The previous action: This is the action you just performed. It may be helpful to track your progress.
|
||||||
|
|
||||||
|
The actions you can perform fall into several categories:
|
||||||
|
|
||||||
|
Page Operation Actions:
|
||||||
|
`click [id]`: This action clicks on an element with a specific id on the webpage.
|
||||||
|
`type [id] [content] [press_enter_after=0|1]`: Use this to type the content into the field with id. By default, the "Enter" key is pressed after typing unless press_enter_after is set to 0.
|
||||||
|
`hover [id]`: Hover over an element with id.
|
||||||
|
`press [key_comb]`: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).
|
||||||
|
`scroll [direction=down|up]`: Scroll the page up or down.
|
||||||
|
|
||||||
|
Tab Management Actions:
|
||||||
|
`new_tab`: Open a new, empty browser tab.
|
||||||
|
`tab_focus [tab_index]`: Switch the browser's focus to a specific tab using its index.
|
||||||
|
`close_tab`: Close the currently active tab.
|
||||||
|
|
||||||
|
URL Navigation Actions:
|
||||||
|
`goto [url]`: Navigate to a specific URL.
|
||||||
|
`go_back`: Navigate to the previously viewed page.
|
||||||
|
`go_forward`: Navigate to the next page (if a previous 'go_back' action was performed).
|
||||||
|
|
||||||
|
Completion Action:
|
||||||
|
`stop [answer]`: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket.
|
||||||
|
|
||||||
|
Homepage:
|
||||||
|
If you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.
|
||||||
|
http://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites.
|
||||||
|
|
||||||
|
To be successful, it is very important to follow the following rules:
|
||||||
|
1. You should only issue an action that is valid given the current observation
|
||||||
|
2. You should only issue one action at a time.
|
||||||
|
3. You should follow the examples to reason step by step and then issue the next action.
|
||||||
|
4. Generate the action in the correct format. Start with a "In summary, the next action I will perform is" phrase, followed by action inside ``````. For example, "In summary, the next action I will perform is ```click [1234]```".
|
||||||
|
5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.""",
|
||||||
|
"examples": [
|
||||||
|
(
|
||||||
|
"""OBSERVATION:
|
||||||
|
[1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)'
|
||||||
|
[1749] StaticText '$279.49'
|
||||||
|
[1757] button 'Add to Cart'
|
||||||
|
[1760] button 'Add to Wish List'
|
||||||
|
[1761] button 'Add to Compare'
|
||||||
|
URL: http://onestopmarket.com/office-products/office-electronics.html
|
||||||
|
OBJECTIVE: What is the price of HP Inkjet Fax Machine
|
||||||
|
PREVIOUS ACTION: None""",
|
||||||
|
"Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"""OBSERVATION:
|
||||||
|
[164] textbox 'Search' focused: True required: False
|
||||||
|
[171] button 'Go'
|
||||||
|
[174] link 'Find directions between two points'
|
||||||
|
[212] heading 'Search Results'
|
||||||
|
[216] button 'Close'
|
||||||
|
URL: http://openstreetmap.org
|
||||||
|
OBJECTIVE: Show me the restaurants near CMU
|
||||||
|
PREVIOUS ACTION: None""",
|
||||||
|
"Let's think step-by-step. This page has a search box whose ID is [164]. According to the nominatim rule of openstreetmap, I can search for the restaurants near a location by \"restaurants near\". I can submit my typing by pressing the Enter afterwards. In summary, the next action I will perform is ```type [164] [restaurants near CMU] [1]```",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
"template": """OBSERVATION:
|
||||||
|
{observation}
|
||||||
|
URL: {url}
|
||||||
|
OBJECTIVE: {objective}
|
||||||
|
PREVIOUS ACTION: {previous_action}""",
|
||||||
|
"meta_data": {
|
||||||
|
"observation": "accessibility_tree",
|
||||||
|
"action_type": "id_accessibility_tree",
|
||||||
|
"keywords": ["url", "objective", "observation", "previous_action"],
|
||||||
|
"prompt_constructor": "CoTPromptConstructor",
|
||||||
|
"answer_phrase": "In summary, the next action I will perform is",
|
||||||
|
"action_splitter": "```"
|
||||||
|
},
|
||||||
|
}
|
115
VAB-WebArena-Lite/agent/prompts/raw/p_cot_id_actree_3s.py
Normal file
|
@ -0,0 +1,115 @@
|
||||||
|
prompt = {
|
||||||
|
"intro": """You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue.
|
||||||
|
|
||||||
|
Here's the information you'll have:
|
||||||
|
The user's objective: This is the task you're trying to complete.
|
||||||
|
The current web page's accessibility tree: This is a simplified representation of the webpage, providing key information.
|
||||||
|
The current web page's URL: This is the page you're currently navigating.
|
||||||
|
The open tabs: These are the tabs you have open.
|
||||||
|
The previous action: This is the action you just performed. It may be helpful to track your progress.
|
||||||
|
|
||||||
|
The actions you can perform fall into several categories:
|
||||||
|
|
||||||
|
Page Operation Actions:
|
||||||
|
```click [id]```: This action clicks on an element with a specific id on the webpage.
|
||||||
|
```type [id] [content]```: Use this to type the content into the field with id. By default, the "Enter" key is pressed after typing unless press_enter_after is set to 0, i.e., ```type [id] [content] [0]```.
|
||||||
|
```hover [id]```: Hover over an element with id.
|
||||||
|
```press [key_comb]```: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).
|
||||||
|
```scroll [down]``` or ```scroll [up]```: Scroll the page up or down.
|
||||||
|
|
||||||
|
Tab Management Actions:
|
||||||
|
```new_tab```: Open a new, empty browser tab.
|
||||||
|
```tab_focus [tab_index]```: Switch the browser's focus to a specific tab using its index.
|
||||||
|
```close_tab```: Close the currently active tab.
|
||||||
|
|
||||||
|
URL Navigation Actions:
|
||||||
|
```goto [url]```: Navigate to a specific URL.
|
||||||
|
```go_back```: Navigate to the previously viewed page.
|
||||||
|
```go_forward```: Navigate to the next page (if a previous 'go_back' action was performed).
|
||||||
|
|
||||||
|
Completion Action:
|
||||||
|
```stop [answer]```: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket.
|
||||||
|
|
||||||
|
Homepage:
|
||||||
|
If you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.
|
||||||
|
http://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites.
|
||||||
|
|
||||||
|
To be successful, it is very important to follow the following rules:
|
||||||
|
1. You should only issue an action that is valid given the current observation
|
||||||
|
2. You should only issue one action at a time.
|
||||||
|
3. You should follow the examples to reason step by step and then issue the next action.
|
||||||
|
4. Generate the action in the correct format. Start with a "In summary, the next action I will perform is" phrase, followed by action inside ``````. For example, "In summary, the next action I will perform is ```click [1234]```".
|
||||||
|
5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.""",
|
||||||
|
"examples": [
|
||||||
|
(
|
||||||
|
"""OBSERVATION:
|
||||||
|
[1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)'
|
||||||
|
[1749] StaticText '$279.49'
|
||||||
|
[1757] button 'Add to Cart'
|
||||||
|
[1760] button 'Add to Wish List'
|
||||||
|
[1761] button 'Add to Compare'
|
||||||
|
URL: http://onestopmarket.com/office-products/office-electronics.html
|
||||||
|
OBJECTIVE: What is the price of HP Inkjet Fax Machine?
|
||||||
|
PREVIOUS ACTION: None""",
|
||||||
|
"Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"""OBSERVATION:
|
||||||
|
[204] heading '/f/food'
|
||||||
|
[593] heading '[homemade] Obligatory Halloween Pumpkin Loaf!'
|
||||||
|
[942] link '[homemade] Obligatory Halloween Pumpkin Loaf!'
|
||||||
|
[945] StaticText 'Submitted by '
|
||||||
|
[30] link 'kneechalice' expanded: False
|
||||||
|
[1484] StaticText 't3_yid9lu'
|
||||||
|
[949] time 'October 31, 2022 at 10:10:03 AM EDT'
|
||||||
|
[1488] StaticText '1 year ago'
|
||||||
|
[1489] link '45 comments'
|
||||||
|
[605] heading '[I ate] Maple Pecan Croissant'
|
||||||
|
[963] link '[I ate] Maple Pecan Croissant'
|
||||||
|
[966] StaticText 'Submitted by '
|
||||||
|
[37] link 'AccordingtoJP' expanded: False
|
||||||
|
[1494] StaticText 't3_y3hrpn'
|
||||||
|
[970] time 'October 13, 2022 at 10:41:09 PM EDT'
|
||||||
|
[1498] StaticText '1 year ago'
|
||||||
|
[1499] link '204 comments'
|
||||||
|
URL: http://reddit.com
|
||||||
|
OBJECTIVE: Tell me what the top comment on the croissant post says.
|
||||||
|
PREVIOUS ACTION: None""",
|
||||||
|
"Let's think step-by-step. This page has a post titled '[I ate] Maple Pecan Croissant', which is the post mentioned in the objective. In order to find the top comment, I will navigate into the comments section of the post. In summary, the next action I will perform is ```click [1499]```",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"""OBSERVATION:
|
||||||
|
[42] link 'My account'
|
||||||
|
[43] link 'Logout'
|
||||||
|
[44] link 'Publish Ad'
|
||||||
|
[25] heading 'What are you looking for today?'
|
||||||
|
[143] StaticText 'Keyword'
|
||||||
|
[81] textbox 'e.g., a blue used car' required: False
|
||||||
|
[146] StaticText 'Category'
|
||||||
|
[28] heading 'Latest Listings'
|
||||||
|
[86] link 'Atlas Powered Audio System w/ Tripod'
|
||||||
|
[176] img 'Atlas Powered Audio System w/ Tripod'
|
||||||
|
[511] StaticText '150.00 $'
|
||||||
|
[88] link 'Neptune Gaming Console'
|
||||||
|
[178] img 'Neptune Gaming Console'
|
||||||
|
[515] StaticText '350.00 $'
|
||||||
|
URL: http://classifieds.com
|
||||||
|
OBJECTIVE: Help me find the cheapest dark colored guitar.
|
||||||
|
PREVIOUS ACTION: None""",
|
||||||
|
"Let's think step-by-step. The objective is to find the cheapest dark colored guitar on the site. The site has a search box whose ID is [81]. I can search for guitars by entering \"guitar\". I can submit this by pressing the Enter afterwards. In summary, the next action I will perform is ```type [81] [guitar] [1]```",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
"template": """OBSERVATION:
|
||||||
|
{observation}
|
||||||
|
URL: {url}
|
||||||
|
OBJECTIVE: {objective}
|
||||||
|
PREVIOUS ACTION: {previous_action}""",
|
||||||
|
"meta_data": {
|
||||||
|
"observation": "accessibility_tree",
|
||||||
|
"action_type": "id_accessibility_tree",
|
||||||
|
"keywords": ["url", "objective", "observation", "previous_action"],
|
||||||
|
"prompt_constructor": "CoTPromptConstructor",
|
||||||
|
"answer_phrase": "In summary, the next action I will perform is",
|
||||||
|
"action_splitter": "```"
|
||||||
|
},
|
||||||
|
}
|
|
@ -0,0 +1,118 @@
|
||||||
|
prompt = {
|
||||||
|
"intro": """You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue.
|
||||||
|
|
||||||
|
Here's the information you'll have:
|
||||||
|
The user's objective: This is the task you're trying to complete.
|
||||||
|
The current web page's accessibility tree: This is a simplified representation of the webpage, providing key information.
|
||||||
|
The current web page's URL: This is the page you're currently navigating.
|
||||||
|
The open tabs: These are the tabs you have open.
|
||||||
|
The previous action: This is the action you just performed. It may be helpful to track your progress.
|
||||||
|
|
||||||
|
The actions you can perform fall into several categories:
|
||||||
|
|
||||||
|
Page Operation Actions:
|
||||||
|
```click [id]```: This action clicks on an element with a specific id on the webpage.
|
||||||
|
```type [id] [content]```: Use this to type the content into the field with id. By default, the "Enter" key is pressed after typing unless press_enter_after is set to 0, i.e., ```type [id] [content] [0]```.
|
||||||
|
```hover [id]```: Hover over an element with id.
|
||||||
|
```press [key_comb]```: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).
|
||||||
|
```scroll [down]``` or ```scroll [up]```: Scroll the page up or down.
|
||||||
|
|
||||||
|
Tab Management Actions:
|
||||||
|
```new_tab```: Open a new, empty browser tab.
|
||||||
|
```tab_focus [tab_index]```: Switch the browser's focus to a specific tab using its index.
|
||||||
|
```close_tab```: Close the currently active tab.
|
||||||
|
|
||||||
|
URL Navigation Actions:
|
||||||
|
```goto [url]```: Navigate to a specific URL.
|
||||||
|
```go_back```: Navigate to the previously viewed page.
|
||||||
|
```go_forward```: Navigate to the next page (if a previous 'go_back' action was performed).
|
||||||
|
|
||||||
|
Completion Action:
|
||||||
|
```stop [answer]```: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket.
|
||||||
|
|
||||||
|
Homepage:
|
||||||
|
If you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.
|
||||||
|
http://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites.
|
||||||
|
|
||||||
|
To be successful, it is very important to follow the following rules:
|
||||||
|
1. You should only issue an action that is valid given the current observation
|
||||||
|
2. You should only issue one action at a time.
|
||||||
|
3. You should follow the examples to reason step by step and then issue the next action.
|
||||||
|
4. Generate the action in the correct format. Start with a "In summary, the next action I will perform is" phrase, followed by action inside ``````. For example, "In summary, the next action I will perform is ```click [1234]```".
|
||||||
|
5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.""",
|
||||||
|
"examples": [
|
||||||
|
(
|
||||||
|
"""OBSERVATION:
|
||||||
|
[1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)'
|
||||||
|
[1749] StaticText '$279.49'
|
||||||
|
[1757] button 'Add to Cart'
|
||||||
|
[1760] button 'Add to Wish List'
|
||||||
|
[1761] button 'Add to Compare'
|
||||||
|
URL: http://onestopmarket.com/office-products/office-electronics.html
|
||||||
|
OBJECTIVE: What is the price of HP Inkjet Fax Machine?
|
||||||
|
PREVIOUS ACTION: None""",
|
||||||
|
"Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```",
|
||||||
|
"agent/prompts/multimodal_examples/multimodal_example1.png"
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"""OBSERVATION:
|
||||||
|
[204] heading '/f/food'
|
||||||
|
[593] heading '[homemade] Obligatory Halloween Pumpkin Loaf!'
|
||||||
|
[942] link '[homemade] Obligatory Halloween Pumpkin Loaf!'
|
||||||
|
[945] StaticText 'Submitted by '
|
||||||
|
[30] link 'kneechalice' expanded: False
|
||||||
|
[1484] StaticText 't3_yid9lu'
|
||||||
|
[949] time 'October 31, 2022 at 10:10:03 AM EDT'
|
||||||
|
[1488] StaticText '1 year ago'
|
||||||
|
[1489] link '45 comments'
|
||||||
|
[605] heading '[I ate] Maple Pecan Croissant'
|
||||||
|
[963] link '[I ate] Maple Pecan Croissant'
|
||||||
|
[966] StaticText 'Submitted by '
|
||||||
|
[37] link 'AccordingtoJP' expanded: False
|
||||||
|
[1494] StaticText 't3_y3hrpn'
|
||||||
|
[970] time 'October 13, 2022 at 10:41:09 PM EDT'
|
||||||
|
[1498] StaticText '1 year ago'
|
||||||
|
[1499] link '204 comments'
|
||||||
|
URL: http://reddit.com
|
||||||
|
OBJECTIVE: Tell me what the top comment on the croissant post says.
|
||||||
|
PREVIOUS ACTION: None""",
|
||||||
|
"Let's think step-by-step. This page has a post titled '[I ate] Maple Pecan Croissant', which is the post mentioned in the objective. In order to find the top comment, I will navigate into the comments section of the post. In summary, the next action I will perform is ```click [1499]```",
|
||||||
|
"agent/prompts/multimodal_examples/multimodal_example2.png"
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"""OBSERVATION:
|
||||||
|
[42] link 'My account'
|
||||||
|
[43] link 'Logout'
|
||||||
|
[44] link 'Publish Ad'
|
||||||
|
[25] heading 'What are you looking for today?'
|
||||||
|
[143] StaticText 'Keyword'
|
||||||
|
[81] textbox 'e.g., a blue used car' required: False
|
||||||
|
[146] StaticText 'Category'
|
||||||
|
[28] heading 'Latest Listings'
|
||||||
|
[86] link 'Atlas Powered Audio System w/ Tripod'
|
||||||
|
[176] img 'Atlas Powered Audio System w/ Tripod'
|
||||||
|
[511] StaticText '150.00 $'
|
||||||
|
[88] link 'Neptune Gaming Console'
|
||||||
|
[178] img 'Neptune Gaming Console'
|
||||||
|
[515] StaticText '350.00 $'
|
||||||
|
URL: http://classifieds.com
|
||||||
|
OBJECTIVE: Help me find the cheapest dark colored guitar.
|
||||||
|
PREVIOUS ACTION: None""",
|
||||||
|
"Let's think step-by-step. The objective is to find the cheapest dark colored guitar on the site. The site has a search box whose ID is [81]. I can search for guitars by entering \"guitar\". I can submit this by pressing the Enter afterwards. In summary, the next action I will perform is ```type [81] [guitar] [1]```",
|
||||||
|
"agent/prompts/multimodal_examples/multimodal_example3.png"
|
||||||
|
),
|
||||||
|
],
|
||||||
|
"template": """OBSERVATION:
|
||||||
|
{observation}
|
||||||
|
URL: {url}
|
||||||
|
OBJECTIVE: {objective}
|
||||||
|
PREVIOUS ACTION: {previous_action}""",
|
||||||
|
"meta_data": {
|
||||||
|
"observation": "accessibility_tree",
|
||||||
|
"action_type": "id_accessibility_tree",
|
||||||
|
"keywords": ["url", "objective", "observation", "previous_action"],
|
||||||
|
"prompt_constructor": "MultimodalCoTPromptConstructor",
|
||||||
|
"answer_phrase": "In summary, the next action I will perform is",
|
||||||
|
"action_splitter": "```"
|
||||||
|
},
|
||||||
|
}
|
112
VAB-WebArena-Lite/agent/prompts/raw/p_som_cot_id_actree_3s.py
Normal file
|
@ -0,0 +1,112 @@
|
||||||
|
prompt = {
|
||||||
|
"intro": """You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue.
|
||||||
|
|
||||||
|
Here's the information you'll have:
|
||||||
|
The user's objective: This is the task you're trying to complete.
|
||||||
|
The current web page screenshot: This is a screenshot of the webpage, with each interactable element assigned a unique numerical id. Each bounding box and its respective id shares the same color.
|
||||||
|
The observation, which lists the IDs of all interactable elements on the current web page with their text content if any, in the format [id] [tagType] [text content]. tagType is the type of the element, such as button, link, or textbox. text content is the text content of the element. For example, [1234] [button] ['Add to Cart'] means that there is a button with id 1234 and text content 'Add to Cart' on the current web page. [] [StaticText] [text] means that the element is of some text that is not interactable.
|
||||||
|
The current web page's URL: This is the page you're currently navigating.
|
||||||
|
The open tabs: These are the tabs you have open.
|
||||||
|
The previous action: This is the action you just performed. It may be helpful to track your progress.
|
||||||
|
|
||||||
|
The actions you can perform fall into several categories:
|
||||||
|
|
||||||
|
Page Operation Actions:
|
||||||
|
```click [id]```: This action clicks on an element with a specific id on the webpage.
|
||||||
|
```type [id] [content]```: Use this to type the content into the field with id. By default, the "Enter" key is pressed after typing unless press_enter_after is set to 0, i.e., ```type [id] [content] [0]```.
|
||||||
|
```hover [id]```: Hover over an element with id.
|
||||||
|
```press [key_comb]```: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).
|
||||||
|
```scroll [down]``` or ```scroll [up]```: Scroll the page up or down.
|
||||||
|
|
||||||
|
Tab Management Actions:
|
||||||
|
```new_tab```: Open a new, empty browser tab.
|
||||||
|
```tab_focus [tab_index]```: Switch the browser's focus to a specific tab using its index.
|
||||||
|
```close_tab```: Close the currently active tab.
|
||||||
|
|
||||||
|
URL Navigation Actions:
|
||||||
|
```goto [url]```: Navigate to a specific URL.
|
||||||
|
```go_back```: Navigate to the previously viewed page.
|
||||||
|
```go_forward```: Navigate to the next page (if a previous 'go_back' action was performed).
|
||||||
|
|
||||||
|
Completion Action:
|
||||||
|
```stop [answer]```: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket.
|
||||||
|
|
||||||
|
Homepage:
|
||||||
|
If you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.
|
||||||
|
http://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites.
|
||||||
|
|
||||||
|
To be successful, it is very important to follow the following rules:
|
||||||
|
1. You should only issue an action that is valid given the current observation
|
||||||
|
2. You should only issue one action at a time.
|
||||||
|
3. You should follow the examples to reason step by step and then issue the next action.
|
||||||
|
4. Generate the action in the correct format. Start with a "In summary, the next action I will perform is" phrase, followed by action inside ``````. For example, "In summary, the next action I will perform is ```click [1234]```".
|
||||||
|
5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.""",
|
||||||
|
"examples": [
|
||||||
|
(
|
||||||
|
"""OBSERVATION:
|
||||||
|
[31] [IMG] [Image, description: hp fx-7010dn fax machine, url: http://ec2-3-13-232-171.us-east-2.compute.amazonaws.com:7770/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B08GKZ3ZKD.0.jpg]
|
||||||
|
[32] [A] [HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)]
|
||||||
|
[] [StaticText] [$279.49]
|
||||||
|
[33] [BUTTON] [Add to Cart]
|
||||||
|
[34] [A] [Add to Wish List]
|
||||||
|
[35] [A] [Add to Compare]
|
||||||
|
URL: http://onestopmarket.com/office-products/office-electronics.html
|
||||||
|
OBJECTIVE: What is the price of HP Inkjet Fax Machine?
|
||||||
|
PREVIOUS ACTION: None""",
|
||||||
|
"Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```",
|
||||||
|
"agent/prompts/som_examples/som_example1.png"
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"""OBSERVATION:
|
||||||
|
[] [StaticText] [/f/food]
|
||||||
|
[] [StaticText] [[homemade] Obligatory Halloween Pumpkin Loaf! Submitted by kneechalice t3_yid9lu 1 year ago]
|
||||||
|
[9] [IMG] []
|
||||||
|
[] [StaticText] [Submitted by kneechalice t3_yid9lu 1 year ago]
|
||||||
|
[10] [A] [kneechalice]
|
||||||
|
[11] [A] [45 comments]
|
||||||
|
[] [StaticText] [[I ate] Maple Pecan Croissant Submitted by AccordingtoJP t3_y3hrpn 1 year ago]
|
||||||
|
[14] [IMG] []
|
||||||
|
[] [StaticText] [Submitted by AccordingtoJP t3_y3hrpn 1 year ago]
|
||||||
|
[15] [A] [AccordingtoJP]
|
||||||
|
[16] [A] [204 comments]
|
||||||
|
URL: http://reddit.com
|
||||||
|
OBJECTIVE: Tell me what the top comment on the croissant post says.
|
||||||
|
PREVIOUS ACTION: None""",
|
||||||
|
"Let's think step-by-step. This page has a post titled '[I ate] Maple Pecan Croissant', which is the post mentioned in the objective. In order to find the top comment, I will navigate into the comments section of the post. In summary, the next action I will perform is ```click [11]```",
|
||||||
|
"agent/prompts/som_examples/som_example2.png"
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"""OBSERVATION:
|
||||||
|
[] [StaticText] [What are you looking for today?]
|
||||||
|
[5] [INPUT] []
|
||||||
|
[6] [SELECT] [Select a category]
|
||||||
|
[7] [BUTTON] [Search]
|
||||||
|
[] [StaticText] [Latest Listings]
|
||||||
|
[] [StaticText] [Atlas Powered Audio System w/ Tripod 150.00 $ Music instruments Borough of Red Lion (Pennsylvania) 2023/11/16]
|
||||||
|
[8] [IMG] [Atlas Powered Audio System w/ Tripod]
|
||||||
|
[9] [A] [Atlas Powered Audio System w/ Tripod]
|
||||||
|
[] [StaticText] [150.00 $]
|
||||||
|
[] [StaticText] [Neptune Gaming Console 350.00 $ Video gaming Pennwyn (Pennsylvania) 2023/11/16]
|
||||||
|
[10] [IMG] [Neptune Gaming Console]
|
||||||
|
[11] [A] [Neptune Gaming Console]
|
||||||
|
[] [StaticText] [350.00 $]
|
||||||
|
URL: http://classifieds.com
|
||||||
|
OBJECTIVE: Help me find the cheapest dark colored guitar.
|
||||||
|
PREVIOUS ACTION: None""",
|
||||||
|
"Let's think step-by-step. The objective is to find the cheapest dark colored guitar on the site. The site has a search box whose ID is [5]. I can search for guitars by entering \"guitar\". I can submit this by pressing the Enter afterwards. In summary, the next action I will perform is ```type [5] [guitar] [1]```",
|
||||||
|
"agent/prompts/som_examples/som_example3.png"
|
||||||
|
),
|
||||||
|
],
|
||||||
|
"template": """OBSERVATION: {observation}
|
||||||
|
URL: {url}
|
||||||
|
OBJECTIVE: {objective}
|
||||||
|
PREVIOUS ACTION: {previous_action}""",
|
||||||
|
"meta_data": {
|
||||||
|
"observation": "image_som",
|
||||||
|
"action_type": "som",
|
||||||
|
"keywords": ["url", "objective", "observation", "previous_action"],
|
||||||
|
"prompt_constructor": "MultimodalCoTPromptConstructor",
|
||||||
|
"answer_phrase": "In summary, the next action I will perform is",
|
||||||
|
"action_splitter": "```"
|
||||||
|
},
|
||||||
|
}
|
BIN
VAB-WebArena-Lite/agent/prompts/som_examples/som_example1.png
Normal file
After Width: | Height: | Size: 282 KiB |
BIN
VAB-WebArena-Lite/agent/prompts/som_examples/som_example2.png
Normal file
After Width: | Height: | Size: 222 KiB |
BIN
VAB-WebArena-Lite/agent/prompts/som_examples/som_example3.png
Normal file
After Width: | Height: | Size: 310 KiB |
26
VAB-WebArena-Lite/agent/prompts/to_json.py
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
import glob
|
||||||
|
import importlib
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
# use the current directory as the root
|
||||||
|
def run() -> None:
|
||||||
|
"""Convert all python files in agent/prompts to json files in agent/prompts/jsons
|
||||||
|
|
||||||
|
Python files are easiser to edit
|
||||||
|
"""
|
||||||
|
for p_file in glob.glob(f"agent/prompts/raw/*.py"):
|
||||||
|
# import the file as a module
|
||||||
|
base_name = os.path.basename(p_file).replace(".py", "")
|
||||||
|
module = importlib.import_module(f"agent.prompts.raw.{base_name}")
|
||||||
|
prompt = module.prompt
|
||||||
|
# save the prompt as a json file
|
||||||
|
os.makedirs("agent/prompts/jsons", exist_ok=True)
|
||||||
|
with open(f"agent/prompts/jsons/{base_name}.json", "w+") as f:
|
||||||
|
json.dump(prompt, f, indent=2)
|
||||||
|
print(f"Done convert python files to json")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run()
|
76
VAB-WebArena-Lite/browser_env/__init__.py
Normal file
|
@ -0,0 +1,76 @@
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
from .actions import (
|
||||||
|
Action,
|
||||||
|
ActionParsingError,
|
||||||
|
ActionTypes,
|
||||||
|
action2create_function,
|
||||||
|
action2str,
|
||||||
|
create_check_action,
|
||||||
|
create_click_action,
|
||||||
|
create_focus_and_click_action,
|
||||||
|
create_focus_and_type_action,
|
||||||
|
create_go_back_action,
|
||||||
|
create_go_forward_action,
|
||||||
|
create_goto_url_action,
|
||||||
|
create_hover_action,
|
||||||
|
create_id_based_action,
|
||||||
|
create_key_press_action,
|
||||||
|
create_keyboard_type_action,
|
||||||
|
create_mouse_click_action,
|
||||||
|
create_mouse_hover_action,
|
||||||
|
create_new_tab_action,
|
||||||
|
create_none_action,
|
||||||
|
create_page_close_action,
|
||||||
|
create_page_focus_action,
|
||||||
|
create_playwright_action,
|
||||||
|
create_random_action,
|
||||||
|
create_scroll_action,
|
||||||
|
create_select_option_action,
|
||||||
|
create_stop_action,
|
||||||
|
create_type_action,
|
||||||
|
is_equivalent,
|
||||||
|
)
|
||||||
|
from .async_envs import AsyncScriptBrowserEnv
|
||||||
|
from .envs import ScriptBrowserEnv
|
||||||
|
from .processors import ObservationMetadata
|
||||||
|
from .trajectory import Trajectory
|
||||||
|
from .utils import DetachedPage, StateInfo
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"ScriptBrowserEnv",
|
||||||
|
"AsyncScriptBrowserEnv",
|
||||||
|
"DetachedPage",
|
||||||
|
"StateInfo",
|
||||||
|
"ObservationMetadata",
|
||||||
|
"Action",
|
||||||
|
"ActionTypes",
|
||||||
|
"action2str",
|
||||||
|
"create_random_action",
|
||||||
|
"create_focus_and_click_action",
|
||||||
|
"create_focus_and_type_action",
|
||||||
|
"is_equivalent",
|
||||||
|
"create_mouse_click_action",
|
||||||
|
"create_mouse_hover_action",
|
||||||
|
"create_none_action",
|
||||||
|
"create_keyboard_type_action",
|
||||||
|
"create_page_focus_action",
|
||||||
|
"create_new_tab_action",
|
||||||
|
"create_go_back_action",
|
||||||
|
"create_go_forward_action",
|
||||||
|
"create_goto_url_action",
|
||||||
|
"create_page_close_action",
|
||||||
|
"action2create_function",
|
||||||
|
"create_playwright_action",
|
||||||
|
"create_id_based_action",
|
||||||
|
"create_scroll_action",
|
||||||
|
"create_key_press_action",
|
||||||
|
"create_check_action",
|
||||||
|
"create_click_action",
|
||||||
|
"create_type_action",
|
||||||
|
"create_hover_action",
|
||||||
|
"create_select_option_action",
|
||||||
|
"create_stop_action",
|
||||||
|
"ActionParsingError",
|
||||||
|
"Trajectory",
|
||||||
|
]
|
160
VAB-WebArena-Lite/browser_env/async_envs.py
Normal file
|
@ -0,0 +1,160 @@
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import numpy.typing as npt
|
||||||
|
from beartype import beartype
|
||||||
|
from gymnasium import Env
|
||||||
|
from gymnasium.spaces import Box, Text
|
||||||
|
from playwright.async_api import Page, ViewportSize, async_playwright
|
||||||
|
|
||||||
|
from .actions import Action, aexecute_action, get_action_space
|
||||||
|
from .utils import DetachedPage, png_bytes_to_numpy
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncScriptBrowserEnv(Env[npt.NDArray[np.uint8], Action]):
|
||||||
|
"""
|
||||||
|
The goal of this environment is to produce a prototype of a browser environment.
|
||||||
|
In the end, we want to support a fully configurable browser environment with wide
|
||||||
|
range of action spaces and observation spaces, both structured and unstructured.
|
||||||
|
But in this prototype, we just support action space specified by Playwright script,
|
||||||
|
and observation space is the html content of the page.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@beartype
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
max_page_length: int = 2048,
|
||||||
|
headless: bool = True,
|
||||||
|
slow_mo: int = 0,
|
||||||
|
timeout: int = 30000,
|
||||||
|
viewport_size: ViewportSize = {"width": 1280, "height": 720},
|
||||||
|
):
|
||||||
|
self.observation_space = Box(
|
||||||
|
0,
|
||||||
|
255,
|
||||||
|
(viewport_size["height"], viewport_size["width"], 4),
|
||||||
|
np.uint8,
|
||||||
|
)
|
||||||
|
# TODO: make Space[Action] = ActionSpace
|
||||||
|
self.action_space = get_action_space() # type: ignore[assignment]
|
||||||
|
self.headless = headless
|
||||||
|
self.slow_mo = slow_mo
|
||||||
|
self.reset_finished = False
|
||||||
|
self.timeout = timeout
|
||||||
|
self.viewport_size = viewport_size
|
||||||
|
|
||||||
|
@beartype
|
||||||
|
async def setup(self, config_file: Path | None = None) -> None:
|
||||||
|
self.context_manager = async_playwright()
|
||||||
|
self.playwright = await self.context_manager.__aenter__()
|
||||||
|
self.browser = await self.playwright.chromium.launch(
|
||||||
|
headless=self.headless, slow_mo=self.slow_mo
|
||||||
|
)
|
||||||
|
if config_file:
|
||||||
|
with open(config_file, "r") as f:
|
||||||
|
instance_config = json.load(f)
|
||||||
|
else:
|
||||||
|
instance_config = {}
|
||||||
|
|
||||||
|
storage_state = instance_config.get("storage_state", None)
|
||||||
|
start_url = instance_config.get("start_url", None)
|
||||||
|
geolocation = instance_config.get("geolocation", None)
|
||||||
|
|
||||||
|
self.context = await self.browser.new_context(
|
||||||
|
viewport=self.viewport_size,
|
||||||
|
storage_state=storage_state,
|
||||||
|
geolocation=geolocation,
|
||||||
|
device_scale_factor=1,
|
||||||
|
)
|
||||||
|
self.page = await self.context.new_page()
|
||||||
|
if start_url:
|
||||||
|
await self.page.goto(start_url)
|
||||||
|
|
||||||
|
@beartype
|
||||||
|
async def areset(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
seed: int | None = None,
|
||||||
|
options: dict[str, str] | None = None,
|
||||||
|
) -> tuple[npt.NDArray[np.uint8], dict[str, object]]:
|
||||||
|
"""
|
||||||
|
Reset the environment.
|
||||||
|
:param options: options for the environment. The options are:
|
||||||
|
- storage_state: the path to the storage state file
|
||||||
|
"""
|
||||||
|
super().reset(seed=seed, options=options)
|
||||||
|
if self.reset_finished:
|
||||||
|
await self.context_manager.__aexit__()
|
||||||
|
if options is not None and "config_file" in options:
|
||||||
|
config_file = Path(options["config_file"])
|
||||||
|
if config_file.exists():
|
||||||
|
await self.setup(config_file=config_file)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Config state {config_file} does not exist.")
|
||||||
|
else:
|
||||||
|
await self.setup()
|
||||||
|
self.reset_finished = True
|
||||||
|
content = await self.page.content()
|
||||||
|
screenshot = png_bytes_to_numpy(await self.page.screenshot())
|
||||||
|
return (
|
||||||
|
screenshot,
|
||||||
|
{"page": DetachedPage(self.page.url, content)},
|
||||||
|
)
|
||||||
|
|
||||||
|
@beartype
|
||||||
|
def reset(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
seed: int | None = None,
|
||||||
|
options: dict[str, str] | None = None,
|
||||||
|
) -> tuple[npt.NDArray[np.uint8], dict[str, object]]:
|
||||||
|
return asyncio.run(self.areset(seed=seed, options=options))
|
||||||
|
|
||||||
|
async def aclose(self) -> None:
|
||||||
|
if self.reset_finished:
|
||||||
|
await self.context_manager.__aexit__()
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
asyncio.run(self.aclose())
|
||||||
|
|
||||||
|
@beartype
|
||||||
|
async def astep(
|
||||||
|
self, action: Action
|
||||||
|
) -> tuple[npt.NDArray[np.uint8], float, bool, bool, dict[str, object]]:
|
||||||
|
if not self.reset_finished:
|
||||||
|
raise RuntimeError("Call reset first before calling step.")
|
||||||
|
success = False
|
||||||
|
fail_error = ""
|
||||||
|
try:
|
||||||
|
self.page = await aexecute_action(action, self.page, self.context)
|
||||||
|
success = True
|
||||||
|
except Exception as e:
|
||||||
|
fail_error = str(e)
|
||||||
|
|
||||||
|
try:
|
||||||
|
content = await self.page.content()
|
||||||
|
screenshot = png_bytes_to_numpy(await self.page.screenshot())
|
||||||
|
except:
|
||||||
|
await self.page.wait_for_load_state("load")
|
||||||
|
content = await self.page.content()
|
||||||
|
screenshot = png_bytes_to_numpy(await self.page.screenshot())
|
||||||
|
|
||||||
|
return (
|
||||||
|
screenshot,
|
||||||
|
float(success),
|
||||||
|
False,
|
||||||
|
False,
|
||||||
|
{
|
||||||
|
"page": DetachedPage(self.page.url, content),
|
||||||
|
"fail_error": fail_error,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
@beartype
|
||||||
|
def step(
|
||||||
|
self, action: Action
|
||||||
|
) -> tuple[npt.NDArray[np.uint8], float, bool, bool, dict[str, object]]:
|
||||||
|
return asyncio.run(self.astep(action), debug=True)
|
182
VAB-WebArena-Lite/browser_env/auto_login.py
Normal file
|
@ -0,0 +1,182 @@
|
||||||
|
"""Script to automatically login each website"""
|
||||||
|
import argparse
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from itertools import combinations
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
from browser_env.env_config import ACCOUNTS
|
||||||
|
|
||||||
|
DATASET = os.environ["DATASET"]
|
||||||
|
if DATASET == "webarena":
|
||||||
|
from browser_env.env_config import (
|
||||||
|
GITLAB,
|
||||||
|
REDDIT,
|
||||||
|
SHOPPING,
|
||||||
|
SHOPPING_ADMIN,
|
||||||
|
)
|
||||||
|
SITES = ["gitlab", "shopping", "shopping_admin", "reddit"]
|
||||||
|
URLS = [
|
||||||
|
f"{GITLAB}/-/profile",
|
||||||
|
f"{SHOPPING}/wishlist/",
|
||||||
|
f"{SHOPPING_ADMIN}/dashboard",
|
||||||
|
f"{REDDIT}/user/{ACCOUNTS['reddit']['username']}/account",
|
||||||
|
]
|
||||||
|
EXACT_MATCH = [True, True, True, True]
|
||||||
|
KEYWORDS = ["", "", "Dashboard", "Delete"]
|
||||||
|
|
||||||
|
elif DATASET == "visualwebarena":
|
||||||
|
from browser_env.env_config import (
|
||||||
|
CLASSIFIEDS,
|
||||||
|
REDDIT,
|
||||||
|
SHOPPING,
|
||||||
|
)
|
||||||
|
SITES = ["shopping", "reddit", "classifieds"]
|
||||||
|
URLS = [
|
||||||
|
f"{SHOPPING}/wishlist/",
|
||||||
|
f"{REDDIT}/user/{ACCOUNTS['reddit']['username']}/account",
|
||||||
|
f"{CLASSIFIEDS}/index.php?page=user&action=items",
|
||||||
|
]
|
||||||
|
EXACT_MATCH = [True, True, True]
|
||||||
|
KEYWORDS = ["", "Delete", "My listings"]
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Dataset not implemented: {DATASET}")
|
||||||
|
|
||||||
|
HEADLESS = True
|
||||||
|
SLOW_MO = 0
|
||||||
|
|
||||||
|
assert len(SITES) == len(URLS) == len(EXACT_MATCH) == len(KEYWORDS)
|
||||||
|
|
||||||
|
def is_expired(
|
||||||
|
storage_state: Path, url: str, keyword: str, url_exact: bool = True
|
||||||
|
) -> bool:
|
||||||
|
"""Test whether the cookie is expired"""
|
||||||
|
if not storage_state.exists():
|
||||||
|
return True
|
||||||
|
|
||||||
|
context_manager = sync_playwright()
|
||||||
|
playwright = context_manager.__enter__()
|
||||||
|
browser = playwright.chromium.launch(headless=True, slow_mo=SLOW_MO)
|
||||||
|
context = browser.new_context(storage_state=storage_state)
|
||||||
|
page = context.new_page()
|
||||||
|
page.goto(url)
|
||||||
|
time.sleep(1)
|
||||||
|
d_url = page.url
|
||||||
|
content = page.content()
|
||||||
|
context_manager.__exit__()
|
||||||
|
if keyword:
|
||||||
|
return keyword not in content
|
||||||
|
else:
|
||||||
|
if url_exact:
|
||||||
|
return d_url != url
|
||||||
|
else:
|
||||||
|
return url not in d_url
|
||||||
|
|
||||||
|
|
||||||
|
def renew_comb(comb: list[str], auth_folder: str = "./.auth") -> None:
|
||||||
|
context_manager = sync_playwright()
|
||||||
|
playwright = context_manager.__enter__()
|
||||||
|
browser = playwright.chromium.launch(headless=HEADLESS)
|
||||||
|
context = browser.new_context()
|
||||||
|
page = context.new_page()
|
||||||
|
|
||||||
|
if "shopping" in comb:
|
||||||
|
username = ACCOUNTS["shopping"]["username"]
|
||||||
|
password = ACCOUNTS["shopping"]["password"]
|
||||||
|
page.goto(f"{SHOPPING}/customer/account/login/")
|
||||||
|
page.get_by_label("Email", exact=True).fill(username)
|
||||||
|
page.get_by_label("Password", exact=True).fill(password)
|
||||||
|
page.get_by_role("button", name="Sign In").click()
|
||||||
|
|
||||||
|
if "reddit" in comb:
|
||||||
|
username = ACCOUNTS["reddit"]["username"]
|
||||||
|
password = ACCOUNTS["reddit"]["password"]
|
||||||
|
page.goto(f"{REDDIT}/login")
|
||||||
|
page.get_by_label("Username").fill(username)
|
||||||
|
page.get_by_label("Password").fill(password)
|
||||||
|
page.get_by_role("button", name="Log in").click()
|
||||||
|
|
||||||
|
if "classifieds" in comb:
|
||||||
|
username = ACCOUNTS["classifieds"]["username"]
|
||||||
|
password = ACCOUNTS["classifieds"]["password"]
|
||||||
|
page.goto(f"{CLASSIFIEDS}/index.php?page=login")
|
||||||
|
page.locator("#email").fill(username)
|
||||||
|
page.locator("#password").fill(password)
|
||||||
|
page.get_by_role("button", name="Log in").click()
|
||||||
|
|
||||||
|
if "shopping_admin" in comb:
|
||||||
|
username = ACCOUNTS["shopping_admin"]["username"]
|
||||||
|
password = ACCOUNTS["shopping_admin"]["password"]
|
||||||
|
page.goto(f"{SHOPPING_ADMIN}")
|
||||||
|
page.get_by_placeholder("user name").fill(username)
|
||||||
|
page.get_by_placeholder("password").fill(password)
|
||||||
|
page.get_by_role("button", name="Sign in").click()
|
||||||
|
|
||||||
|
if "gitlab" in comb:
|
||||||
|
username = ACCOUNTS["gitlab"]["username"]
|
||||||
|
password = ACCOUNTS["gitlab"]["password"]
|
||||||
|
page.goto(f"{GITLAB}/users/sign_in")
|
||||||
|
page.get_by_test_id("username-field").click()
|
||||||
|
page.get_by_test_id("username-field").fill(username)
|
||||||
|
page.get_by_test_id("username-field").press("Tab")
|
||||||
|
page.get_by_test_id("password-field").fill(password)
|
||||||
|
page.get_by_test_id("sign-in-button").click()
|
||||||
|
|
||||||
|
context.storage_state(path=f"{auth_folder}/{'.'.join(comb)}_state.json")
|
||||||
|
|
||||||
|
context_manager.__exit__()
|
||||||
|
|
||||||
|
|
||||||
|
def get_site_comb_from_filepath(file_path: str) -> list[str]:
|
||||||
|
comb = os.path.basename(file_path).rsplit("_", 1)[0].split(".")
|
||||||
|
return comb
|
||||||
|
|
||||||
|
|
||||||
|
def main(auth_folder: str = "./.auth") -> None:
|
||||||
|
pairs = list(combinations(SITES, 2))
|
||||||
|
|
||||||
|
with ThreadPoolExecutor(max_workers=8) as executor:
|
||||||
|
for pair in pairs:
|
||||||
|
# Auth doesn't work on this pair as they share the same cookie
|
||||||
|
if "reddit" in pair and (
|
||||||
|
"shopping" in pair or "shopping_admin" in pair
|
||||||
|
):
|
||||||
|
continue
|
||||||
|
executor.submit(
|
||||||
|
renew_comb, list(sorted(pair)), auth_folder=auth_folder
|
||||||
|
)
|
||||||
|
|
||||||
|
for site in SITES:
|
||||||
|
executor.submit(renew_comb, [site], auth_folder=auth_folder)
|
||||||
|
|
||||||
|
# parallel checking if the cookies are expired
|
||||||
|
futures = []
|
||||||
|
cookie_files = list(glob.glob(f"{auth_folder}/*.json"))
|
||||||
|
with ThreadPoolExecutor(max_workers=8) as executor:
|
||||||
|
for c_file in cookie_files:
|
||||||
|
comb = get_site_comb_from_filepath(c_file)
|
||||||
|
for cur_site in comb:
|
||||||
|
url = URLS[SITES.index(cur_site)]
|
||||||
|
keyword = KEYWORDS[SITES.index(cur_site)]
|
||||||
|
match = EXACT_MATCH[SITES.index(cur_site)]
|
||||||
|
future = executor.submit(
|
||||||
|
is_expired, Path(c_file), url, keyword, match
|
||||||
|
)
|
||||||
|
futures.append(future)
|
||||||
|
|
||||||
|
for i, future in enumerate(futures):
|
||||||
|
assert not future.result(), f"Cookie {cookie_files[i]} expired."
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--site_list", nargs="+", default=[])
|
||||||
|
parser.add_argument("--auth_folder", type=str, default="./.auth")
|
||||||
|
args = parser.parse_args()
|
||||||
|
if not args.site_list:
|
||||||
|
main()
|
||||||
|
else:
|
||||||
|
renew_comb(args.site_list, auth_folder=args.auth_folder)
|
324
VAB-WebArena-Lite/browser_env/constants.py
Normal file
|
@ -0,0 +1,324 @@
|
||||||
|
import re
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
ROLES = (
|
||||||
|
"alert",
|
||||||
|
"alertdialog",
|
||||||
|
"application",
|
||||||
|
"article",
|
||||||
|
"banner",
|
||||||
|
"blockquote",
|
||||||
|
"button",
|
||||||
|
"caption",
|
||||||
|
"cell",
|
||||||
|
"checkbox",
|
||||||
|
"code",
|
||||||
|
"columnheader",
|
||||||
|
"combobox",
|
||||||
|
"complementary",
|
||||||
|
"contentinfo",
|
||||||
|
"definition",
|
||||||
|
"deletion",
|
||||||
|
"dialog",
|
||||||
|
"directory",
|
||||||
|
"document",
|
||||||
|
"emphasis",
|
||||||
|
"feed",
|
||||||
|
"figure",
|
||||||
|
"form",
|
||||||
|
"generic",
|
||||||
|
"grid",
|
||||||
|
"gridcell",
|
||||||
|
"group",
|
||||||
|
"heading",
|
||||||
|
"img",
|
||||||
|
"insertion",
|
||||||
|
"link",
|
||||||
|
"list",
|
||||||
|
"listbox",
|
||||||
|
"listitem",
|
||||||
|
"log",
|
||||||
|
"main",
|
||||||
|
"marquee",
|
||||||
|
"math",
|
||||||
|
"meter",
|
||||||
|
"menu",
|
||||||
|
"menubar",
|
||||||
|
"menuitem",
|
||||||
|
"menuitemcheckbox",
|
||||||
|
"menuitemradio",
|
||||||
|
"navigation",
|
||||||
|
"none",
|
||||||
|
"note",
|
||||||
|
"option",
|
||||||
|
"paragraph",
|
||||||
|
"presentation",
|
||||||
|
"progressbar",
|
||||||
|
"radio",
|
||||||
|
"radiogroup",
|
||||||
|
"region",
|
||||||
|
"row",
|
||||||
|
"rowgroup",
|
||||||
|
"rowheader",
|
||||||
|
"scrollbar",
|
||||||
|
"search",
|
||||||
|
"searchbox",
|
||||||
|
"separator",
|
||||||
|
"slider",
|
||||||
|
"spinbutton",
|
||||||
|
"status",
|
||||||
|
"strong",
|
||||||
|
"subscript",
|
||||||
|
"superscript",
|
||||||
|
"switch",
|
||||||
|
"tab",
|
||||||
|
"table",
|
||||||
|
"tablist",
|
||||||
|
"tabpanel",
|
||||||
|
"term",
|
||||||
|
"textbox",
|
||||||
|
"time",
|
||||||
|
"timer",
|
||||||
|
"toolbar",
|
||||||
|
"tooltip",
|
||||||
|
"tree",
|
||||||
|
"treegrid",
|
||||||
|
"treeitem",
|
||||||
|
)
|
||||||
|
|
||||||
|
SPECIAL_LOCATORS = (
|
||||||
|
"alt_text",
|
||||||
|
"label",
|
||||||
|
"placeholder",
|
||||||
|
)
|
||||||
|
|
||||||
|
ASCII_CHARSET = "".join(chr(x) for x in range(32, 128))
|
||||||
|
FREQ_UNICODE_CHARSET = "".join(chr(x) for x in range(129, 130000))
|
||||||
|
UTTERANCE_MAX_LENGTH = 8192
|
||||||
|
ATTRIBUTE_MAX_LENGTH = 256
|
||||||
|
TEXT_MAX_LENGTH = 256
|
||||||
|
TYPING_MAX_LENGTH = 64
|
||||||
|
URL_MAX_LENGTH = 256
|
||||||
|
MAX_ELEMENT_INDEX_IN_VIEWPORT = 10
|
||||||
|
MAX_ELEMENT_ID = 1000
|
||||||
|
MAX_ANSWER_LENGTH = 512
|
||||||
|
|
||||||
|
MIN_REF = -1000000
|
||||||
|
MAX_REF = 1000000
|
||||||
|
|
||||||
|
WINDOW_WIDTH = 500
|
||||||
|
WINDOW_HEIGHT = 240
|
||||||
|
TASK_WIDTH = 160
|
||||||
|
TASK_HEIGHT = 210
|
||||||
|
|
||||||
|
FLIGHT_WINDOW_WIDTH = 600
|
||||||
|
FLIGHT_WINDOW_HEIGHT = 700
|
||||||
|
FLIGHT_TASK_WIDTH = 375
|
||||||
|
FLIGHT_TASK_HEIGHT = 667
|
||||||
|
MAX_PAGE_NUMBER = 10
|
||||||
|
|
||||||
|
SPECIAL_KEYS = (
|
||||||
|
"Enter",
|
||||||
|
"Tab",
|
||||||
|
"Control",
|
||||||
|
"Shift",
|
||||||
|
"Meta",
|
||||||
|
"Backspace",
|
||||||
|
"Delete",
|
||||||
|
"Escape",
|
||||||
|
"ArrowUp",
|
||||||
|
"ArrowDown",
|
||||||
|
"ArrowLeft",
|
||||||
|
"ArrowRight",
|
||||||
|
"PageDown",
|
||||||
|
"PageUp",
|
||||||
|
"Meta+a",
|
||||||
|
)
|
||||||
|
|
||||||
|
SPECIAL_KEY_MAPPINGS = {
|
||||||
|
"backquote": "Backquote",
|
||||||
|
"minus": "Minus",
|
||||||
|
"equal": "Equal",
|
||||||
|
"backslash": "Backslash",
|
||||||
|
"backspace": "Backspace",
|
||||||
|
"meta": "Meta",
|
||||||
|
"tab": "Tab",
|
||||||
|
"delete": "Delete",
|
||||||
|
"escape": "Escape",
|
||||||
|
"arrowdown": "ArrowDown",
|
||||||
|
"end": "End",
|
||||||
|
"enter": "Enter",
|
||||||
|
"home": "Home",
|
||||||
|
"insert": "Insert",
|
||||||
|
"pagedown": "PageDown",
|
||||||
|
"pageup": "PageUp",
|
||||||
|
"arrowright": "ArrowRight",
|
||||||
|
"arrowup": "ArrowUp",
|
||||||
|
"f1": "F1",
|
||||||
|
"f2": "F2",
|
||||||
|
"f3": "F3",
|
||||||
|
"f4": "F4",
|
||||||
|
"f5": "F5",
|
||||||
|
"f6": "F6",
|
||||||
|
"f7": "F7",
|
||||||
|
"f8": "F8",
|
||||||
|
"f9": "F9",
|
||||||
|
"f10": "F10",
|
||||||
|
"f11": "F11",
|
||||||
|
"f12": "F12",
|
||||||
|
}
|
||||||
|
|
||||||
|
RolesType = Literal[
|
||||||
|
"alert",
|
||||||
|
"alertdialog",
|
||||||
|
"application",
|
||||||
|
"article",
|
||||||
|
"banner",
|
||||||
|
"blockquote",
|
||||||
|
"button",
|
||||||
|
"caption",
|
||||||
|
"cell",
|
||||||
|
"checkbox",
|
||||||
|
"code",
|
||||||
|
"columnheader",
|
||||||
|
"combobox",
|
||||||
|
"complementary",
|
||||||
|
"contentinfo",
|
||||||
|
"definition",
|
||||||
|
"deletion",
|
||||||
|
"dialog",
|
||||||
|
"directory",
|
||||||
|
"document",
|
||||||
|
"emphasis",
|
||||||
|
"feed",
|
||||||
|
"figure",
|
||||||
|
"form",
|
||||||
|
"generic",
|
||||||
|
"grid",
|
||||||
|
"gridcell",
|
||||||
|
"group",
|
||||||
|
"heading",
|
||||||
|
"img",
|
||||||
|
"insertion",
|
||||||
|
"link",
|
||||||
|
"list",
|
||||||
|
"listbox",
|
||||||
|
"listitem",
|
||||||
|
"log",
|
||||||
|
"main",
|
||||||
|
"marquee",
|
||||||
|
"math",
|
||||||
|
"meter",
|
||||||
|
"menu",
|
||||||
|
"menubar",
|
||||||
|
"menuitem",
|
||||||
|
"menuitemcheckbox",
|
||||||
|
"menuitemradio",
|
||||||
|
"navigation",
|
||||||
|
"none",
|
||||||
|
"note",
|
||||||
|
"option",
|
||||||
|
"paragraph",
|
||||||
|
"presentation",
|
||||||
|
"progressbar",
|
||||||
|
"radio",
|
||||||
|
"radiogroup",
|
||||||
|
"region",
|
||||||
|
"row",
|
||||||
|
"rowgroup",
|
||||||
|
"rowheader",
|
||||||
|
"scrollbar",
|
||||||
|
"search",
|
||||||
|
"searchbox",
|
||||||
|
"separator",
|
||||||
|
"slider",
|
||||||
|
"spinbutton",
|
||||||
|
"status",
|
||||||
|
"strong",
|
||||||
|
"subscript",
|
||||||
|
"superscript",
|
||||||
|
"switch",
|
||||||
|
"tab",
|
||||||
|
"table",
|
||||||
|
"tablist",
|
||||||
|
"tabpanel",
|
||||||
|
"term",
|
||||||
|
"textbox",
|
||||||
|
"time",
|
||||||
|
"timer",
|
||||||
|
"toolbar",
|
||||||
|
"tooltip",
|
||||||
|
"tree",
|
||||||
|
"treegrid",
|
||||||
|
"treeitem",
|
||||||
|
"alt_text",
|
||||||
|
"label",
|
||||||
|
"placeholder",
|
||||||
|
]
|
||||||
|
|
||||||
|
MAX_VANILLA_STR_LENGTH = 1000
|
||||||
|
|
||||||
|
PLAYWRIGHT_LOCATORS = (
|
||||||
|
"get_by_role",
|
||||||
|
"get_by_text",
|
||||||
|
"get_by_label",
|
||||||
|
"get_by_placeholder",
|
||||||
|
"get_by_alt_text",
|
||||||
|
"get_by_title",
|
||||||
|
"get_by_test_id",
|
||||||
|
"filter",
|
||||||
|
"frame_locator",
|
||||||
|
"locator",
|
||||||
|
)
|
||||||
|
|
||||||
|
PLAYWRIGHT_ACTIONS = (
|
||||||
|
"fill",
|
||||||
|
"check",
|
||||||
|
"select_option",
|
||||||
|
"click",
|
||||||
|
"hover",
|
||||||
|
"dclick",
|
||||||
|
"type",
|
||||||
|
"focus",
|
||||||
|
"goto",
|
||||||
|
"press",
|
||||||
|
"scroll",
|
||||||
|
)
|
||||||
|
|
||||||
|
IGNORED_ACTREE_PROPERTIES = (
|
||||||
|
"focusable",
|
||||||
|
"editable",
|
||||||
|
"readonly",
|
||||||
|
"level",
|
||||||
|
"settable",
|
||||||
|
"multiline",
|
||||||
|
"invalid",
|
||||||
|
)
|
||||||
|
|
||||||
|
INJECTED_ATTR_NAME = "aria-roledescription"
|
||||||
|
BID_ATTR = "bid" # the attribute name for extra meta data
|
||||||
|
BID_EXPR = r"([-0-9]+)"
|
||||||
|
FLOAT_EXPR = r"([+-]?(?:[0-9]*[.])?[0-9]+)"
|
||||||
|
BOOL_EXPR = r"([01])"
|
||||||
|
|
||||||
|
DATA_REGEXP = re.compile(
|
||||||
|
BID_EXPR
|
||||||
|
+ r"_"
|
||||||
|
+ FLOAT_EXPR
|
||||||
|
+ r"_"
|
||||||
|
+ FLOAT_EXPR
|
||||||
|
+ r"_"
|
||||||
|
+ FLOAT_EXPR
|
||||||
|
+ r"_"
|
||||||
|
+ FLOAT_EXPR
|
||||||
|
+ r"_"
|
||||||
|
+ FLOAT_EXPR
|
||||||
|
+ r"_"
|
||||||
|
+ FLOAT_EXPR
|
||||||
|
+ r"_"
|
||||||
|
+ BOOL_EXPR
|
||||||
|
+ r"_"
|
||||||
|
+ r"(.*)"
|
||||||
|
)
|
||||||
|
|
||||||
|
IN_VIEWPORT_RATIO_THRESHOLD = 0.6
|
97
VAB-WebArena-Lite/browser_env/env_config.py
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
# websites domain
|
||||||
|
import os
|
||||||
|
|
||||||
|
DATASET = os.environ["DATASET"]
|
||||||
|
if DATASET not in ["webarena", "visualwebarena"]:
|
||||||
|
raise ValueError("Please set the DATASET environment variable, the possible options are `webarena`, `visualwebarena` and `miniwob++`")
|
||||||
|
|
||||||
|
# WebArena
|
||||||
|
if DATASET == "webarena":
|
||||||
|
REDDIT = os.environ.get("REDDIT", "")
|
||||||
|
SHOPPING = os.environ.get("SHOPPING", "")
|
||||||
|
SHOPPING_ADMIN = os.environ.get("SHOPPING_ADMIN", "")
|
||||||
|
GITLAB = os.environ.get("GITLAB", "")
|
||||||
|
WIKIPEDIA = os.environ.get("WIKIPEDIA", "")
|
||||||
|
MAP = os.environ.get("MAP", "")
|
||||||
|
HOMEPAGE = os.environ.get("HOMEPAGE", "")
|
||||||
|
assert (
|
||||||
|
REDDIT
|
||||||
|
and SHOPPING
|
||||||
|
and SHOPPING_ADMIN
|
||||||
|
and GITLAB
|
||||||
|
and WIKIPEDIA
|
||||||
|
and MAP
|
||||||
|
and HOMEPAGE
|
||||||
|
), (
|
||||||
|
f"Please setup the URLs to each site. Current: \n"
|
||||||
|
+ f"Reddit: {REDDIT}\n"
|
||||||
|
+ f"Shopping: {SHOPPING}\n"
|
||||||
|
+ f"Shopping Admin: {SHOPPING_ADMIN}\n"
|
||||||
|
+ f"Gitlab: {GITLAB}\n"
|
||||||
|
+ f"Wikipedia: {WIKIPEDIA}\n"
|
||||||
|
+ f"Map: {MAP}\n"
|
||||||
|
+ f"Homepage: {HOMEPAGE}\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
URL_MAPPINGS = {
|
||||||
|
REDDIT: "http://reddit.com",
|
||||||
|
SHOPPING: "http://onestopmarket.com",
|
||||||
|
SHOPPING_ADMIN: "http://luma.com/admin",
|
||||||
|
GITLAB: "http://gitlab.com",
|
||||||
|
WIKIPEDIA: "http://wikipedia.org",
|
||||||
|
MAP: "http://openstreetmap.org",
|
||||||
|
HOMEPAGE: "http://homepage.com",
|
||||||
|
}
|
||||||
|
|
||||||
|
elif DATASET == "visualwebarena":
|
||||||
|
REDDIT = os.environ.get("REDDIT", "")
|
||||||
|
SHOPPING = os.environ.get("SHOPPING", "")
|
||||||
|
WIKIPEDIA = os.environ.get("WIKIPEDIA", "")
|
||||||
|
HOMEPAGE = os.environ.get("HOMEPAGE", "")
|
||||||
|
CLASSIFIEDS = os.environ.get("CLASSIFIEDS", "")
|
||||||
|
CLASSIFIEDS_RESET_TOKEN = os.environ.get("CLASSIFIEDS_RESET_TOKEN", "")
|
||||||
|
REDDIT_RESET_URL = os.environ.get("REDDIT_RESET_URL", "")
|
||||||
|
|
||||||
|
assert (
|
||||||
|
REDDIT
|
||||||
|
and SHOPPING
|
||||||
|
and WIKIPEDIA
|
||||||
|
and HOMEPAGE
|
||||||
|
and CLASSIFIEDS
|
||||||
|
and CLASSIFIEDS_RESET_TOKEN
|
||||||
|
), (
|
||||||
|
f"Please setup the URLs and tokens to each site. Current: "
|
||||||
|
+ f"Reddit: {REDDIT}"
|
||||||
|
+ f"Shopping: {SHOPPING}"
|
||||||
|
+ f"Wikipedia: {WIKIPEDIA}"
|
||||||
|
+ f"Homepage: {HOMEPAGE}"
|
||||||
|
+ f"Classifieds: {CLASSIFIEDS}"
|
||||||
|
+ f"Classifieds reset token: {CLASSIFIEDS_RESET_TOKEN}"
|
||||||
|
)
|
||||||
|
|
||||||
|
URL_MAPPINGS = {
|
||||||
|
REDDIT: "http://reddit.com",
|
||||||
|
SHOPPING: "http://onestopmarket.com",
|
||||||
|
WIKIPEDIA: "http://wikipedia.org",
|
||||||
|
HOMEPAGE: "http://homepage.com",
|
||||||
|
CLASSIFIEDS: "http://classifieds.com",
|
||||||
|
}
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Dataset not implemented: {DATASET}")
|
||||||
|
|
||||||
|
|
||||||
|
ACCOUNTS = {
|
||||||
|
"reddit": {"username": "MarvelsGrantMan136", "password": "test1234"},
|
||||||
|
"shopping": {
|
||||||
|
"username": "emma.lopez@gmail.com",
|
||||||
|
"password": "Password.123",
|
||||||
|
},
|
||||||
|
"classifieds": {
|
||||||
|
"username": "blake.sullivan@gmail.com",
|
||||||
|
"password": "Password.123",
|
||||||
|
},
|
||||||
|
"shopping_site_admin": {"username": "admin", "password": "admin1234"},
|
||||||
|
"shopping_admin": {"username": "admin", "password": "admin1234"},
|
||||||
|
"gitlab": {"username": "byteblaze", "password": "hello1234"},
|
||||||
|
}
|
|
@ -94,6 +94,7 @@ class ScriptBrowserEnv(Env[dict[str, Observation], Action]):
|
||||||
save_trace_enabled: bool = False,
|
save_trace_enabled: bool = False,
|
||||||
sleep_after_execution: float = 0.0,
|
sleep_after_execution: float = 0.0,
|
||||||
captioning_fn=None,
|
captioning_fn=None,
|
||||||
|
proxy_url: str = "",
|
||||||
):
|
):
|
||||||
# TODO: make Space[Action] = ActionSpace
|
# TODO: make Space[Action] = ActionSpace
|
||||||
self.action_space = get_action_space() # type: ignore[assignment]
|
self.action_space = get_action_space() # type: ignore[assignment]
|
||||||
|
@ -104,6 +105,7 @@ class ScriptBrowserEnv(Env[dict[str, Observation], Action]):
|
||||||
self.viewport_size = viewport_size
|
self.viewport_size = viewport_size
|
||||||
self.save_trace_enabled = save_trace_enabled
|
self.save_trace_enabled = save_trace_enabled
|
||||||
self.sleep_after_execution = sleep_after_execution
|
self.sleep_after_execution = sleep_after_execution
|
||||||
|
self.proxy_url = proxy_url
|
||||||
|
|
||||||
match observation_type:
|
match observation_type:
|
||||||
case "html" | "accessibility_tree" | "accessibility_tree_with_captioner" | "webrl":
|
case "html" | "accessibility_tree" | "accessibility_tree_with_captioner" | "webrl":
|
||||||
|
@ -187,6 +189,12 @@ class ScriptBrowserEnv(Env[dict[str, Observation], Action]):
|
||||||
storage_state=storage_state,
|
storage_state=storage_state,
|
||||||
geolocation=geolocation,
|
geolocation=geolocation,
|
||||||
device_scale_factor=1,
|
device_scale_factor=1,
|
||||||
|
proxy={
|
||||||
|
"server": self.proxy_url,
|
||||||
|
"bypass": "127.0.0.1,localhost",
|
||||||
|
}
|
||||||
|
if self.proxy_url
|
||||||
|
else None,
|
||||||
)
|
)
|
||||||
if self.save_trace_enabled:
|
if self.save_trace_enabled:
|
||||||
self.context.tracing.start(screenshots=True, snapshots=True)
|
self.context.tracing.start(screenshots=True, snapshots=True)
|
192
VAB-WebArena-Lite/browser_env/javascript/frame_mark_elements.js
Normal file
|
@ -0,0 +1,192 @@
|
||||||
|
/**
|
||||||
|
* Go through all DOM elements in the frame (including shadowDOMs), give them unique browsergym
|
||||||
|
* identifiers (bid), and store custom data in the aria-roledescription attribute.
|
||||||
|
*/
|
||||||
|
var { innerWidth: windowWidth, innerHeight: windowHeight } = window;
|
||||||
|
var scrollX = window.scrollX || document.documentElement.scrollLeft;
|
||||||
|
var scrollY = window.scrollY || document.documentElement.scrollTop;
|
||||||
|
|
||||||
|
([parent_bid, bid_attr_name, iframe_position, super_iframe_offset]) => {
|
||||||
|
|
||||||
|
// standard html tags
|
||||||
|
// https://www.w3schools.com/tags/
|
||||||
|
const html_tags = [
|
||||||
|
"a", "abbr", "acronym", "address", "applet", "area", "article", "aside", "audio",
|
||||||
|
"b", "base", "basefont", "bdi", "bdo", "big", "blockquote", "body", "br", "button",
|
||||||
|
"canvas", "caption", "center", "cite", "code", "col", "colgroup", "data", "datalist",
|
||||||
|
"dd", "del", "details", "dfn", "dialog", "dir", "div", "dl", "dt", "em", "embed",
|
||||||
|
"fieldset", "figcaption", "figure", "font", "footer", "form", "frame", "frameset",
|
||||||
|
"h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html", "i",
|
||||||
|
"iframe", "img", "input", "ins", "kbd", "label", "legend", "li", "link", "main",
|
||||||
|
"map", "mark", "menu", "meta", "meter", "nav", "noframes", "noscript", "object",
|
||||||
|
"ol", "optgroup", "option", "output", "p", "param", "picture", "pre", "progress",
|
||||||
|
"q", "rp", "rt", "ruby", "s", "samp", "script", "search", "section", "select",
|
||||||
|
"small", "source", "span", "strike", "strong", "style", "sub", "summary", "sup",
|
||||||
|
"svg", "table", "tbody", "td", "template", "textarea", "tfoot", "th", "thead",
|
||||||
|
"time", "title", "tr", "track", "tt", "u", "ul", "var", "video", "wbr"
|
||||||
|
];
|
||||||
|
|
||||||
|
if (super_iframe_offset == null) {
|
||||||
|
|
||||||
|
iframe_offset = { x: scrollX, y: scrollY, right: windowWidth, bottom: windowHeight };
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
[super_x, super_y, super_right, super_bottom] = [super_iframe_offset["x"], super_iframe_offset["y"], super_iframe_offset["right"], super_iframe_offset["bottom"]];
|
||||||
|
|
||||||
|
x = Math.max(-iframe_position.x, 0);
|
||||||
|
y = Math.max(-iframe_position.y, 0);
|
||||||
|
right = Math.min(...[super_right, windowWidth, super_right - iframe_position.x]);
|
||||||
|
bottom = Math.min(...[super_bottom, windowHeight, super_bottom - iframe_position.y]);
|
||||||
|
iframe_offset = { x: x, y: y, right: right, bottom: bottom };
|
||||||
|
}
|
||||||
|
|
||||||
|
let browsergym_first_visit = false;
|
||||||
|
// if no yet set, set the frame (local) element counter to 0
|
||||||
|
if (!("browsergym_frame_elem_counter" in window)) {
|
||||||
|
window.browsergym_frame_elem_counter = 0;
|
||||||
|
browsergym_first_visit = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// get all DOM elements in the current frame (does not include elements in shadowDOMs)
|
||||||
|
let elements = Array.from(document.querySelectorAll('*'));
|
||||||
|
i = 0;
|
||||||
|
while (i < elements.length) {
|
||||||
|
const elem = elements[i];
|
||||||
|
// add shadowDOM elements to the elements array, in such a way that order is preserved
|
||||||
|
// TODO: do we really need the order preserved?
|
||||||
|
if (elem.shadowRoot !== null) {
|
||||||
|
elements = new Array(
|
||||||
|
...Array.prototype.slice.call(elements, 0, i + 1),
|
||||||
|
...Array.from(elem.shadowRoot.querySelectorAll("*")),
|
||||||
|
...Array.prototype.slice.call(elements, i + 1)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
// we will mark only standard HTML tags
|
||||||
|
if (!elem.tagName || !html_tags.includes(elem.tagName.toLowerCase())) {
|
||||||
|
// console.log(`Skipping element ${elem.outerHTML}`)
|
||||||
|
continue; // stop and move on to the next element
|
||||||
|
}
|
||||||
|
// console.log(`Processing element ${elem.outerHTML}`)
|
||||||
|
// write dynamic element values to the DOM
|
||||||
|
if (typeof elem.value !== 'undefined') {
|
||||||
|
elem.setAttribute("value", elem.value);
|
||||||
|
}
|
||||||
|
// write dynamic checked properties to the DOM
|
||||||
|
if (typeof elem.checked !== 'undefined') {
|
||||||
|
if (elem.checked === true) {
|
||||||
|
elem.setAttribute("checked", "");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
elem.removeAttribute("checked");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// add the element global id to a custom HTML attribute
|
||||||
|
// https://playwright.dev/docs/locators#locate-by-test-id
|
||||||
|
// recover the element id if it has one already, else compute a new element id
|
||||||
|
let elem_global_bid;
|
||||||
|
if (elem.hasAttribute(bid_attr_name)) {
|
||||||
|
// throw an error if the attribute is already set while this is the first visit of the page
|
||||||
|
if (browsergym_first_visit) {
|
||||||
|
throw new Error(`Attribute ${bid_attr_name} already used in element ${elem.outerHTML}`);
|
||||||
|
}
|
||||||
|
elem_global_bid = elem.getAttribute(bid_attr_name);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
let elem_local_id = window.browsergym_frame_elem_counter++;
|
||||||
|
if (parent_bid == "") {
|
||||||
|
elem_global_bid = `${elem_local_id}`;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
elem_global_bid = `${parent_bid}-${elem_local_id}`;
|
||||||
|
}
|
||||||
|
elem.setAttribute(bid_attr_name, `${elem_global_bid}`);
|
||||||
|
}
|
||||||
|
// Hack: store custom data inside the aria-roledescription attribute (will be available in DOM and AXTree)
|
||||||
|
// - elem_global_bid: global element identifier (unique over multiple frames)
|
||||||
|
// TODO: add more data if needed (x, y coordinates, bounding box, is_visible, is_clickable etc.)
|
||||||
|
|
||||||
|
let [rect, is_in_viewport] = getElementPositionInfo(elem, iframe_offset, iframe_position);
|
||||||
|
let left = (rect.left + iframe_position.x).toString();
|
||||||
|
let top = (rect.top + iframe_position.y ).toString();
|
||||||
|
let right = (rect.right + iframe_position.x ).toString();
|
||||||
|
let bottom = (rect.bottom + iframe_position.y).toString();
|
||||||
|
let center_x = ((rect.left + rect.right) / 2 + iframe_position.x).toString();
|
||||||
|
let center_y = ((rect.top + rect.bottom) / 2 + iframe_position.y).toString();
|
||||||
|
|
||||||
|
elem.setAttribute("browsergym_center", `(${center_x}, ${center_y})`);
|
||||||
|
elem.setAttribute("browsergym_bounding_box", `(${left}, ${top}, ${right}, ${bottom})`);
|
||||||
|
elem.setAttribute("browsergym_is_in_viewport", `${is_in_viewport}`);
|
||||||
|
|
||||||
|
let original_content = "";
|
||||||
|
if (elem.hasAttribute("aria-roledescription")) {
|
||||||
|
original_content = elem.getAttribute("aria-roledescription");
|
||||||
|
}
|
||||||
|
let new_content = `${elem_global_bid}_${left}_${top}_${center_x}_${center_y}_${right}_${bottom}_${is_in_viewport}_${original_content}`
|
||||||
|
elem.setAttribute("aria-roledescription", new_content);
|
||||||
|
|
||||||
|
}
|
||||||
|
return iframe_offset;
|
||||||
|
|
||||||
|
}
|
||||||
|
function getElementPositionInfo(element, iframe_offset, iframe_position) {
|
||||||
|
var rect = element.getBoundingClientRect();
|
||||||
|
let x = (rect.left + rect.right) / 2 ;
|
||||||
|
let y = (rect.top + rect.bottom) / 2 ;
|
||||||
|
//loop over element ancestors (parent) and refine iframe offset to be the most precise possible
|
||||||
|
var parent = element.parentElement;
|
||||||
|
parent_iframe_offset = { x: 0, y: 0, right: windowWidth, bottom: windowHeight };
|
||||||
|
while (parent !== null) {
|
||||||
|
var parent_rect = parent.getBoundingClientRect();
|
||||||
|
parent_iframe_offset["x"] = Math.max(parent_rect.left , parent_iframe_offset["x"] );
|
||||||
|
parent_iframe_offset["y"] = Math.max(parent_rect.top , parent_iframe_offset["y"] );
|
||||||
|
parent_iframe_offset["right"] = Math.min(parent_rect.right , parent_iframe_offset["right"] );
|
||||||
|
parent_iframe_offset["bottom"] = Math.min(parent_rect.bottom , parent_iframe_offset["bottom"] );
|
||||||
|
parent = parent.parentElement;
|
||||||
|
}
|
||||||
|
|
||||||
|
var is_in_viewport = (
|
||||||
|
x >= iframe_offset["x"] &&
|
||||||
|
y >= iframe_offset["y"] &&
|
||||||
|
x <= iframe_offset["right"] &&
|
||||||
|
y <= iframe_offset["bottom"]
|
||||||
|
);
|
||||||
|
//this features is broken for the moment
|
||||||
|
var NotBehindParent = (
|
||||||
|
x >= parent_iframe_offset["x"] &&
|
||||||
|
y >= parent_iframe_offset["y"] &&
|
||||||
|
x <= parent_iframe_offset["right"] &&
|
||||||
|
y <= parent_iframe_offset["bottom"]
|
||||||
|
);
|
||||||
|
|
||||||
|
var isVisible = (typeof element.offsetWidth === 'undefined' || typeof element.offsetHeight === 'undefined') || (element.offsetWidth > 0 && element.offsetHeight > 0);
|
||||||
|
|
||||||
|
// Return true if the element is both in the viewport and has non-zero dimensions
|
||||||
|
return [rect, (is_in_viewport && isVisible && IsInFront(element))? 1 : 0];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function IsInFront(element){
|
||||||
|
var rect = element.getBoundingClientRect();
|
||||||
|
var x = (rect.left + rect.right) / 2 ;
|
||||||
|
var y = (rect.top + rect.bottom) / 2 ;
|
||||||
|
var newElement = elementFromPoint(x, y); //return the element in the foreground at position (x,y)
|
||||||
|
if(newElement){
|
||||||
|
if(newElement === element)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
function elementFromPoint(x, y) {
|
||||||
|
let node = document.elementFromPoint(x, y);
|
||||||
|
|
||||||
|
let child = node?.shadowRoot?.elementFromPoint(x, y);
|
||||||
|
|
||||||
|
while (child && child !== node) {
|
||||||
|
node = child;
|
||||||
|
child = node?.shadowRoot?.elementFromPoint(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
return child || node;
|
||||||
|
}
|
|
@ -0,0 +1,41 @@
|
||||||
|
/**
|
||||||
|
* Go through all DOM elements in the frame (including shadowDOMs),
|
||||||
|
* and cleanup previously stored data in the aria-roledescription attribute.
|
||||||
|
*/
|
||||||
|
() => {
|
||||||
|
// get all DOM elements in the current frame (does not include elements in shadowDOMs)
|
||||||
|
let elements = Array.from(document.querySelectorAll('*'));
|
||||||
|
let i = 0;
|
||||||
|
while (i < elements.length) {
|
||||||
|
const elem = elements[i];
|
||||||
|
// add shadowDOM elements to the elements array, in such a way that order is preserved
|
||||||
|
// TODO: do we really need the order preserved?
|
||||||
|
if (elem.shadowRoot !== null) {
|
||||||
|
elements = new Array(
|
||||||
|
...Array.prototype.slice.call(elements, 0, i + 1),
|
||||||
|
...Array.from(elem.shadowRoot.querySelectorAll("*")),
|
||||||
|
...Array.prototype.slice.call(elements, i + 1)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
// Hack: remove custom data stored inside the aria-roledescription tag
|
||||||
|
// - elem_global_id: global browsergym identifier
|
||||||
|
if (elem.hasAttribute("aria-roledescription")) {
|
||||||
|
let content = elem.getAttribute("aria-roledescription");
|
||||||
|
// TODO: handle more data if needed
|
||||||
|
let n_data_items = 8; // bid, bbox_left, bbox_top, center_x, center_y, bbox_right, bbox_bottom, is_in_viewport
|
||||||
|
let post_data_index = 0;
|
||||||
|
for (let j = 0 ; j < n_data_items ; j++) {
|
||||||
|
post_data_index = content.indexOf("_", post_data_index) + 1;
|
||||||
|
}
|
||||||
|
original_content = content.substring(post_data_index);
|
||||||
|
if (original_content) {
|
||||||
|
elem.setAttribute("aria-roledescription", original_content);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
elem.removeAttribute("aria-roledescription");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1114,7 +1114,7 @@ class ImageObservationProcessor(ObservationProcessor):
|
||||||
try:
|
try:
|
||||||
browser_info = self.fetch_browser_info(page)
|
browser_info = self.fetch_browser_info(page)
|
||||||
except Exception:
|
except Exception:
|
||||||
page.wait_for_load_state("load", timeout=500)
|
page.wait_for_load_state("load", timeout=30000) # 500->30000, modified by yuyr
|
||||||
browser_info = self.fetch_browser_info(page)
|
browser_info = self.fetch_browser_info(page)
|
||||||
|
|
||||||
self.browser_config = browser_info["config"]
|
self.browser_config = browser_info["config"]
|
0
VAB-WebArena-Lite/browser_env/py.typed
Normal file
6
VAB-WebArena-Lite/browser_env/trajectory.py
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
from .actions import Action
|
||||||
|
from .utils import StateInfo
|
||||||
|
|
||||||
|
Trajectory = list[Union[StateInfo, Action]]
|
106
VAB-WebArena-Lite/browser_env/utils.py
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
import base64
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from io import BytesIO
|
||||||
|
from typing import Any, Dict, TypedDict, Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import numpy.typing as npt
|
||||||
|
from beartype import beartype
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
try:
|
||||||
|
from vertexai.preview.generative_models import Image as VertexImage
|
||||||
|
except:
|
||||||
|
print('Google Cloud not set up, skipping import of vertexai.preview.generative_models.Image')
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DetachedPage:
|
||||||
|
url: str
|
||||||
|
content: str # html
|
||||||
|
|
||||||
|
|
||||||
|
@beartype
|
||||||
|
def png_bytes_to_numpy(png: bytes) -> npt.NDArray[np.uint8]:
|
||||||
|
"""Convert png bytes to numpy array
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
>>> fig = go.Figure(go.Scatter(x=[1], y=[1]))
|
||||||
|
>>> plt.imshow(png_bytes_to_numpy(fig.to_image('png')))
|
||||||
|
"""
|
||||||
|
return np.array(Image.open(BytesIO(png)))
|
||||||
|
|
||||||
|
|
||||||
|
def pil_to_b64(img: Image.Image) -> str:
|
||||||
|
with BytesIO() as image_buffer:
|
||||||
|
img.save(image_buffer, format="PNG")
|
||||||
|
byte_data = image_buffer.getvalue()
|
||||||
|
img_b64 = base64.b64encode(byte_data).decode("utf-8")
|
||||||
|
img_b64 = "data:image/png;base64," + img_b64
|
||||||
|
return img_b64
|
||||||
|
|
||||||
|
|
||||||
|
def pil_to_vertex(img: Image.Image) -> str:
|
||||||
|
with BytesIO() as image_buffer:
|
||||||
|
img.save(image_buffer, format="PNG")
|
||||||
|
byte_data = image_buffer.getvalue()
|
||||||
|
img_vertex = VertexImage.from_bytes(byte_data)
|
||||||
|
return img_vertex
|
||||||
|
|
||||||
|
|
||||||
|
class DOMNode(TypedDict):
|
||||||
|
nodeId: str
|
||||||
|
nodeType: str
|
||||||
|
nodeName: str
|
||||||
|
nodeValue: str
|
||||||
|
attributes: str
|
||||||
|
backendNodeId: str
|
||||||
|
parentId: str
|
||||||
|
childIds: list[str]
|
||||||
|
cursor: int
|
||||||
|
union_bound: list[float] | None
|
||||||
|
center: list[float] | None
|
||||||
|
|
||||||
|
|
||||||
|
class AccessibilityTreeNode(TypedDict):
|
||||||
|
nodeId: str
|
||||||
|
ignored: bool
|
||||||
|
role: dict[str, Any]
|
||||||
|
chromeRole: dict[str, Any]
|
||||||
|
name: dict[str, Any]
|
||||||
|
properties: list[dict[str, Any]]
|
||||||
|
childIds: list[str]
|
||||||
|
parentId: str
|
||||||
|
backendDOMNodeId: int
|
||||||
|
frameId: str
|
||||||
|
bound: list[float] | None
|
||||||
|
union_bound: list[float] | None
|
||||||
|
offsetrect_bound: list[float] | None
|
||||||
|
center: list[float] | None
|
||||||
|
|
||||||
|
|
||||||
|
class BrowserConfig(TypedDict):
|
||||||
|
win_upper_bound: float
|
||||||
|
win_left_bound: float
|
||||||
|
win_width: float
|
||||||
|
win_height: float
|
||||||
|
win_right_bound: float
|
||||||
|
win_lower_bound: float
|
||||||
|
device_pixel_ratio: float
|
||||||
|
|
||||||
|
|
||||||
|
class BrowserInfo(TypedDict):
|
||||||
|
DOMTree: dict[str, Any]
|
||||||
|
config: BrowserConfig
|
||||||
|
|
||||||
|
|
||||||
|
AccessibilityTree = list[AccessibilityTreeNode]
|
||||||
|
DOMTree = list[DOMNode]
|
||||||
|
|
||||||
|
Observation = str | npt.NDArray[np.uint8]
|
||||||
|
|
||||||
|
|
||||||
|
class StateInfo(TypedDict):
|
||||||
|
observation: dict[str, Observation]
|
||||||
|
info: Dict[str, Any]
|
BIN
VAB-WebArena-Lite/coco_images/000000000285.jpg
Normal file
After Width: | Height: | Size: 328 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000022371.jpg
Normal file
After Width: | Height: | Size: 42 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000023781.jpg
Normal file
After Width: | Height: | Size: 150 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000024567.jpg
Normal file
After Width: | Height: | Size: 238 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000029675.jpg
Normal file
After Width: | Height: | Size: 194 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000029984.jpg
Normal file
After Width: | Height: | Size: 180 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000039551.jpg
Normal file
After Width: | Height: | Size: 150 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000048153.jpg
Normal file
After Width: | Height: | Size: 225 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000048396.jpg
Normal file
After Width: | Height: | Size: 145 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000048924.jpg
Normal file
After Width: | Height: | Size: 229 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000050331.jpg
Normal file
After Width: | Height: | Size: 161 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000058539.jpg
Normal file
After Width: | Height: | Size: 139 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000058705.jpg
Normal file
After Width: | Height: | Size: 251 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000060899.jpg
Normal file
After Width: | Height: | Size: 136 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000064574.jpg
Normal file
After Width: | Height: | Size: 119 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000064718.jpg
Normal file
After Width: | Height: | Size: 191 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000066771.jpg
Normal file
After Width: | Height: | Size: 170 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000084477.jpg
Normal file
After Width: | Height: | Size: 153 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000097278.jpg
Normal file
After Width: | Height: | Size: 103 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000102805.jpg
Normal file
After Width: | Height: | Size: 172 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000136772.jpg
Normal file
After Width: | Height: | Size: 83 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000161032.jpg
Normal file
After Width: | Height: | Size: 208 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000162415.jpg
Normal file
After Width: | Height: | Size: 109 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000163257.jpg
Normal file
After Width: | Height: | Size: 133 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000163682.jpg
Normal file
After Width: | Height: | Size: 680 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000166768.jpg
Normal file
After Width: | Height: | Size: 125 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000167240.jpg
Normal file
After Width: | Height: | Size: 147 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000167353.jpg
Normal file
After Width: | Height: | Size: 150 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000172595.jpg
Normal file
After Width: | Height: | Size: 122 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000173008.jpg
Normal file
After Width: | Height: | Size: 119 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000175443.jpg
Normal file
After Width: | Height: | Size: 147 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000176232.jpg
Normal file
After Width: | Height: | Size: 118 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000286708.jpg
Normal file
After Width: | Height: | Size: 114 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000324258.jpg
Normal file
After Width: | Height: | Size: 96 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000324715.jpg
Normal file
After Width: | Height: | Size: 119 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000327601.jpg
Normal file
After Width: | Height: | Size: 92 KiB |
BIN
VAB-WebArena-Lite/coco_images/000000329041.jpg
Normal file
After Width: | Height: | Size: 127 KiB |