增加环境变量指向g14部署webarena环境;
map网站使用socks5连接到官网openstreetmap; 模型改成gpt-4o; llm fuzzy math也使用4o; 增加generate_test_data.py脚本
This commit is contained in:
parent
c078ba6292
commit
bb7393bcb3
11
.gitignore
vendored
Normal file
11
.gitignore
vendored
Normal file
|
@ -0,0 +1,11 @@
|
|||
config_files/tasks/
|
||||
|
||||
*.pyc
|
||||
|
||||
|
||||
.auth/*
|
||||
|
||||
trash-*.txt
|
||||
|
||||
|
||||
.env
|
|
@ -1,7 +1,7 @@
|
|||
logging: True
|
||||
verbose: 1
|
||||
debug: False
|
||||
logdir: "../AgentOccam-Trajectories"
|
||||
logdir: "../AgentOccam-Trajectories-shopping-admin"
|
||||
logname: "AgentOccam"
|
||||
max_steps: 20
|
||||
agent:
|
||||
|
@ -16,7 +16,7 @@ agent:
|
|||
debug: 0
|
||||
verbose: 1
|
||||
number: 1
|
||||
model: "gpt-4-turbo"
|
||||
model: "gpt-4o"
|
||||
documented_interaction_elements: ["url", "plan", "reason", "observation summary", "retained element ids", "observation highlight"]
|
||||
online_interaction_elements: []
|
||||
input: ["step", "objective", "previous plans", "interaction history", "current observation"]
|
||||
|
@ -35,7 +35,7 @@ agent:
|
|||
mode: false
|
||||
debug: 0
|
||||
verbose: 1
|
||||
model: "gpt-4-turbo"
|
||||
model: "gpt-4o-mini"
|
||||
documented_interaction_elements: []
|
||||
online_interaction_elements: []
|
||||
character: "normal"
|
||||
|
@ -52,7 +52,7 @@ agent:
|
|||
mode: false
|
||||
debug: 0
|
||||
verbose: 1
|
||||
model: "gpt-4-turbo"
|
||||
model: "gpt-4o-mini"
|
||||
documented_interaction_elements: []
|
||||
online_interaction_elements: []
|
||||
strict: false
|
||||
|
@ -70,7 +70,11 @@ env:
|
|||
prune: true
|
||||
max_browser_rows: 500
|
||||
headless: True
|
||||
task_ids: ["Allrecipes--3", 65]
|
||||
proxy_url: "socks5://104.248.187.88:1001"
|
||||
# task_ids: ["Allrecipes--3", 65]
|
||||
# task_ids: [7]
|
||||
task_ids: [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
||||
|
||||
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
||||
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
||||
# c
|
||||
|
|
|
@ -15,14 +15,15 @@ from AgentOccam.obs_opt import (
|
|||
|
||||
|
||||
class WebArenaEnvironmentWrapper():
|
||||
def __init__(self, config_file, max_browser_rows=300, max_steps=50, slow_mo=1, observation_type="accessibility_tree", current_viewport_only=False, viewport_size={"width": 1280, "height": 720}, headless=False, global_config=None):
|
||||
def __init__(self, config_file, max_browser_rows=300, max_steps=50, slow_mo=1, observation_type="accessibility_tree", current_viewport_only=False, viewport_size={"width": 1280, "height": 720}, headless=False, global_config=None, proxy_url=""):
|
||||
self.webarena_env = ScriptBrowserEnv(
|
||||
headless=headless,
|
||||
slow_mo=slow_mo,
|
||||
observation_type=observation_type,
|
||||
current_viewport_only=current_viewport_only,
|
||||
viewport_size=viewport_size,
|
||||
global_config=global_config
|
||||
global_config=global_config,
|
||||
proxy_url=proxy_url
|
||||
)
|
||||
self.config_file = config_file
|
||||
with open(self.config_file, "r") as f:
|
||||
|
|
|
@ -87,6 +87,7 @@ class ScriptBrowserEnv(Env[dict[str, Observation], Action]):
|
|||
save_trace_enabled: bool = False,
|
||||
sleep_after_execution: float = 5.0,
|
||||
global_config = None,
|
||||
proxy_url: str = "",
|
||||
):
|
||||
# TODO: make Space[Action] = ActionSpace
|
||||
self.action_space = get_action_space() # type: ignore[assignment]
|
||||
|
@ -98,6 +99,9 @@ class ScriptBrowserEnv(Env[dict[str, Observation], Action]):
|
|||
self.save_trace_enabled = save_trace_enabled
|
||||
self.sleep_after_execution = sleep_after_execution
|
||||
self.global_config = global_config
|
||||
self.proxy_url = proxy_url
|
||||
|
||||
print(f"ScriptBrowserEnv proxy_url: {self.proxy_url}")
|
||||
|
||||
match observation_type:
|
||||
case "html" | "accessibility_tree":
|
||||
|
@ -151,6 +155,12 @@ class ScriptBrowserEnv(Env[dict[str, Observation], Action]):
|
|||
storage_state=storage_state,
|
||||
geolocation=geolocation,
|
||||
device_scale_factor=1,
|
||||
proxy={
|
||||
"server": self.proxy_url,
|
||||
"bypass": "127.0.0.1,localhost",
|
||||
}
|
||||
if self.proxy_url
|
||||
else None,
|
||||
)
|
||||
if self.save_trace_enabled:
|
||||
self.context.tracing.start(screenshots=True, snapshots=True)
|
||||
|
@ -165,7 +175,7 @@ class ScriptBrowserEnv(Env[dict[str, Observation], Action]):
|
|||
if self.text_observation_type == "accessibility_tree":
|
||||
client.send("Accessibility.enable")
|
||||
page.client = client # type: ignore # TODO[shuyanzh], fix this hackey client
|
||||
page.goto(url)
|
||||
page.goto(url, timeout=10000)
|
||||
# set the first page as the current page
|
||||
self.page = self.context.pages[0]
|
||||
self.page.bring_to_front()
|
||||
|
|
14
env.sh
Normal file
14
env.sh
Normal file
|
@ -0,0 +1,14 @@
|
|||
webarena_server_address="localhost"
|
||||
|
||||
export SHOPPING="http://${webarena_server_address}:28082"
|
||||
export SHOPPING_ADMIN="http://${webarena_server_address}:28083/admin"
|
||||
export REDDIT="http://${webarena_server_address}:28080"
|
||||
export GITLAB="http://${webarena_server_address}:28084"
|
||||
export MAP="https://www.openstreetmap.org"
|
||||
export WIKIPEDIA="http://${webarena_server_address}:28081/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing"
|
||||
export HOMEPAGE="http://${webarena_server_address}:20080"
|
||||
|
||||
export OPENAI_API_KEY="sk-xxx"
|
||||
export OPENAI_BASE_URL="https://aiproxy.lmzgc.cn:8080/v1"
|
||||
|
||||
export GEMINI_API_KEY="AIzaSyBwE1234567890" # Optional, we provide several other agent base models, such as Claude and LLaMa.
|
|
@ -102,7 +102,8 @@ def run():
|
|||
current_viewport_only=current_viewport_only,
|
||||
viewport_size={"width": 1920, "height": 1080},
|
||||
headless=config.env.headless,
|
||||
global_config=config)
|
||||
global_config=config,
|
||||
proxy_url=config.env.proxy_url)
|
||||
|
||||
agent = agent_init()
|
||||
objective = env.get_objective()
|
||||
|
|
|
@ -11,7 +11,7 @@ from typing import Any, Tuple, Union, Optional
|
|||
|
||||
from beartype import beartype
|
||||
import nltk
|
||||
nltk.download('punkt')
|
||||
# nltk.download('punkt') # NOTE: you need to download the punkt model first
|
||||
from nltk.tokenize import word_tokenize # type: ignore
|
||||
|
||||
from playwright.sync_api import CDPSession, Page
|
||||
|
|
|
@ -158,7 +158,7 @@ def llm_fuzzy_match(pred: str, reference: str, question: str) -> float:
|
|||
{"role": "user", "content": message},
|
||||
]
|
||||
response = generate_from_openai_chat_completion(
|
||||
model="gpt-4-1106-preview",
|
||||
model="gpt-4o",
|
||||
messages=messages,
|
||||
temperature=0,
|
||||
max_tokens=768,
|
||||
|
@ -193,7 +193,7 @@ def llm_ua_match(pred: str, reference: str, question: str) -> float:
|
|||
]
|
||||
|
||||
response = generate_from_openai_chat_completion(
|
||||
model="gpt-4-1106-preview",
|
||||
model="gpt-4o",
|
||||
messages=messages,
|
||||
temperature=0,
|
||||
max_tokens=768,
|
||||
|
|
18
play-AgentOccam.txt
Normal file
18
play-AgentOccam.txt
Normal file
|
@ -0,0 +1,18 @@
|
|||
STEP:
|
||||
7
|
||||
|
||||
OBJECTIVE:
|
||||
Add a new size XXS to blue and purple Nona Fitness Tank
|
||||
|
||||
PREVIOUS PLANS:
|
||||
[0] (Active Plan) Find the solution to "Add a new size XXS to blue and purple Nona Fitness Tank"
|
||||
|
||||
OBSERVATION DESCRIPTION:
|
||||
The current page is the 'Products / Inventory / Catalog' section of the Magento Admin Panel. It displays a list of products with various details such as Name, Type, SKU, and more. The Nona Fitness Tank is listed as a configurable product with an option to edit it. The page includes a search textbox and several buttons for actions like adding products, filters, and search. There is no direct option visible for adding new size attributes.
|
||||
|
||||
REASON:
|
||||
To add the XXS size, it is necessary to create or enable it as an attribute. Since the XXS size was not available in the previous configuration steps, I need to investigate how to add or enable this size attribute. The logical next step is to explore the 'Attributes' section within the 'STORES' menu, as this is where product attributes are typically managed.
|
||||
|
||||
ACTION:
|
||||
click [20159]
|
||||
|
27
scripts/generate_test_data.py
Normal file
27
scripts/generate_test_data.py
Normal file
|
@ -0,0 +1,27 @@
|
|||
"""Replace the website placeholders with website domains from env_config
|
||||
Generate the test data"""
|
||||
import json
|
||||
|
||||
from browser_env.env_config import *
|
||||
|
||||
|
||||
def main() -> None:
|
||||
with open("config_files/test.raw.json", "r") as f:
|
||||
raw = f.read()
|
||||
raw = raw.replace("__GITLAB__", GITLAB)
|
||||
raw = raw.replace("__REDDIT__", REDDIT)
|
||||
raw = raw.replace("__SHOPPING__", SHOPPING)
|
||||
raw = raw.replace("__SHOPPING_ADMIN__", SHOPPING_ADMIN)
|
||||
raw = raw.replace("__WIKIPEDIA__", WIKIPEDIA)
|
||||
raw = raw.replace("__MAP__", MAP)
|
||||
with open("config_files/tasks/test.json", "w") as f:
|
||||
f.write(raw)
|
||||
# split to multiple files
|
||||
data = json.loads(raw)
|
||||
for idx, item in enumerate(data):
|
||||
with open(f"config_files/tasks/{idx}.json", "w") as f:
|
||||
json.dump(item, f, indent=2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,11 +1,18 @@
|
|||
export SHOPPING="http://<webarena_server_address>:7770"
|
||||
export SHOPPING_ADMIN="http://<webarena_server_address>:7780/admin"
|
||||
export REDDIT="http://<webarena_server_address>:9999"
|
||||
export GITLAB="http://<webarena_server_address>:8023"
|
||||
export MAP="http://<webarena_server_address>:3000"
|
||||
export WIKIPEDIA="http://<webarena_server_address>:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing"
|
||||
export HOMEPAGE="http://<webarena_server_address>:4399"
|
||||
export OPENAI_API_KEY=<openai_api_key>
|
||||
conda activate webarena
|
||||
webarena_server_address="localhost"
|
||||
|
||||
export SHOPPING="http://${webarena_server_address}:28082"
|
||||
export SHOPPING_ADMIN="http://${webarena_server_address}:28083/admin"
|
||||
export REDDIT="http://${webarena_server_address}:28080"
|
||||
export GITLAB="http://${webarena_server_address}:28084"
|
||||
export MAP="https://www.openstreetmap.org"
|
||||
export WIKIPEDIA="http://${webarena_server_address}:28081/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing"
|
||||
export HOMEPAGE="http://${webarena_server_address}:20080"
|
||||
|
||||
export OPENAI_API_KEY="sk-xxx" # 改成你的key
|
||||
export OPENAI_BASE_URL="https://aiproxy.lmzgc.cn:8080/v1"
|
||||
|
||||
export GEMINI_API_KEY="AIzaSyBwE1234567890" # Optional, we provide several other agent base models, such as Claude and LLaMa.
|
||||
|
||||
|
||||
python browser_env/auto_login.py
|
||||
python eval_webarena.py --config AgentOccam/configs/AgentOccam.yml
|
||||
python eval_webarena.py --config AgentOccam/configs/AgentOccam.yml
|
||||
|
|
Loading…
Reference in New Issue
Block a user