diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..395e3c8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +config_files/tasks/ + +*.pyc + + +.auth/* + +trash-*.txt + + +.env \ No newline at end of file diff --git a/AgentOccam/configs/AgentOccam.yml b/AgentOccam/configs/AgentOccam.yml index 92ad07d..f2262d2 100644 --- a/AgentOccam/configs/AgentOccam.yml +++ b/AgentOccam/configs/AgentOccam.yml @@ -1,7 +1,7 @@ logging: True verbose: 1 debug: False -logdir: "../AgentOccam-Trajectories" +logdir: "../AgentOccam-Trajectories-shopping-admin" logname: "AgentOccam" max_steps: 20 agent: @@ -16,7 +16,7 @@ agent: debug: 0 verbose: 1 number: 1 - model: "gpt-4-turbo" + model: "gpt-4o" documented_interaction_elements: ["url", "plan", "reason", "observation summary", "retained element ids", "observation highlight"] online_interaction_elements: [] input: ["step", "objective", "previous plans", "interaction history", "current observation"] @@ -35,7 +35,7 @@ agent: mode: false debug: 0 verbose: 1 - model: "gpt-4-turbo" + model: "gpt-4o-mini" documented_interaction_elements: [] online_interaction_elements: [] character: "normal" @@ -52,7 +52,7 @@ agent: mode: false debug: 0 verbose: 1 - model: "gpt-4-turbo" + model: "gpt-4o-mini" documented_interaction_elements: [] online_interaction_elements: [] strict: false @@ -70,7 +70,11 @@ env: prune: true max_browser_rows: 500 headless: True - task_ids: ["Allrecipes--3", 65] + proxy_url: "socks5://104.248.187.88:1001" + # task_ids: ["Allrecipes--3", 65] + # task_ids: [7] + task_ids: [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790] + # a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790] # b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767] # c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798] diff --git a/AgentOccam/env.py b/AgentOccam/env.py index 7d27a49..c4345c8 100644 --- a/AgentOccam/env.py +++ b/AgentOccam/env.py @@ -15,14 +15,15 @@ from AgentOccam.obs_opt import ( class WebArenaEnvironmentWrapper(): - def __init__(self, config_file, max_browser_rows=300, max_steps=50, slow_mo=1, observation_type="accessibility_tree", current_viewport_only=False, viewport_size={"width": 1280, "height": 720}, headless=False, global_config=None): + def __init__(self, config_file, max_browser_rows=300, max_steps=50, slow_mo=1, observation_type="accessibility_tree", current_viewport_only=False, viewport_size={"width": 1280, "height": 720}, headless=False, global_config=None, proxy_url=""): self.webarena_env = ScriptBrowserEnv( headless=headless, slow_mo=slow_mo, observation_type=observation_type, current_viewport_only=current_viewport_only, viewport_size=viewport_size, - global_config=global_config + global_config=global_config, + proxy_url=proxy_url ) self.config_file = config_file with open(self.config_file, "r") as f: diff --git a/browser_env/envs.py b/browser_env/envs.py index 336ca0f..7fce66d 100644 --- a/browser_env/envs.py +++ b/browser_env/envs.py @@ -87,6 +87,7 @@ class ScriptBrowserEnv(Env[dict[str, Observation], Action]): save_trace_enabled: bool = False, sleep_after_execution: float = 5.0, global_config = None, + proxy_url: str = "", ): # TODO: make Space[Action] = ActionSpace self.action_space = get_action_space() # type: ignore[assignment] @@ -98,6 +99,9 @@ class ScriptBrowserEnv(Env[dict[str, Observation], Action]): self.save_trace_enabled = save_trace_enabled self.sleep_after_execution = sleep_after_execution self.global_config = global_config + self.proxy_url = proxy_url + + print(f"ScriptBrowserEnv proxy_url: {self.proxy_url}") match observation_type: case "html" | "accessibility_tree": @@ -151,6 +155,12 @@ class ScriptBrowserEnv(Env[dict[str, Observation], Action]): storage_state=storage_state, geolocation=geolocation, device_scale_factor=1, + proxy={ + "server": self.proxy_url, + "bypass": "127.0.0.1,localhost", + } + if self.proxy_url + else None, ) if self.save_trace_enabled: self.context.tracing.start(screenshots=True, snapshots=True) @@ -165,7 +175,7 @@ class ScriptBrowserEnv(Env[dict[str, Observation], Action]): if self.text_observation_type == "accessibility_tree": client.send("Accessibility.enable") page.client = client # type: ignore # TODO[shuyanzh], fix this hackey client - page.goto(url) + page.goto(url, timeout=10000) # set the first page as the current page self.page = self.context.pages[0] self.page.bring_to_front() diff --git a/env.sh b/env.sh new file mode 100644 index 0000000..ba9bc23 --- /dev/null +++ b/env.sh @@ -0,0 +1,14 @@ +webarena_server_address="localhost" + +export SHOPPING="http://${webarena_server_address}:28082" +export SHOPPING_ADMIN="http://${webarena_server_address}:28083/admin" +export REDDIT="http://${webarena_server_address}:28080" +export GITLAB="http://${webarena_server_address}:28084" +export MAP="https://www.openstreetmap.org" +export WIKIPEDIA="http://${webarena_server_address}:28081/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing" +export HOMEPAGE="http://${webarena_server_address}:20080" + +export OPENAI_API_KEY="sk-xxx" +export OPENAI_BASE_URL="https://aiproxy.lmzgc.cn:8080/v1" + +export GEMINI_API_KEY="AIzaSyBwE1234567890" # Optional, we provide several other agent base models, such as Claude and LLaMa. diff --git a/eval_webarena.py b/eval_webarena.py index 67051e5..b2f4912 100644 --- a/eval_webarena.py +++ b/eval_webarena.py @@ -102,7 +102,8 @@ def run(): current_viewport_only=current_viewport_only, viewport_size={"width": 1920, "height": 1080}, headless=config.env.headless, - global_config=config) + global_config=config, + proxy_url=config.env.proxy_url) agent = agent_init() objective = env.get_objective() diff --git a/evaluation_harness/evaluators.py b/evaluation_harness/evaluators.py index 527c9f9..7d8d0c3 100644 --- a/evaluation_harness/evaluators.py +++ b/evaluation_harness/evaluators.py @@ -11,7 +11,7 @@ from typing import Any, Tuple, Union, Optional from beartype import beartype import nltk -nltk.download('punkt') +# nltk.download('punkt') # NOTE: you need to download the punkt model first from nltk.tokenize import word_tokenize # type: ignore from playwright.sync_api import CDPSession, Page diff --git a/evaluation_harness/helper_functions.py b/evaluation_harness/helper_functions.py index d1a55cf..52a89af 100644 --- a/evaluation_harness/helper_functions.py +++ b/evaluation_harness/helper_functions.py @@ -158,7 +158,7 @@ def llm_fuzzy_match(pred: str, reference: str, question: str) -> float: {"role": "user", "content": message}, ] response = generate_from_openai_chat_completion( - model="gpt-4-1106-preview", + model="gpt-4o", messages=messages, temperature=0, max_tokens=768, @@ -193,7 +193,7 @@ def llm_ua_match(pred: str, reference: str, question: str) -> float: ] response = generate_from_openai_chat_completion( - model="gpt-4-1106-preview", + model="gpt-4o", messages=messages, temperature=0, max_tokens=768, diff --git a/play-AgentOccam.txt b/play-AgentOccam.txt new file mode 100644 index 0000000..20b0555 --- /dev/null +++ b/play-AgentOccam.txt @@ -0,0 +1,18 @@ +STEP: +7 + +OBJECTIVE: +Add a new size XXS to blue and purple Nona Fitness Tank + +PREVIOUS PLANS: +[0] (Active Plan) Find the solution to "Add a new size XXS to blue and purple Nona Fitness Tank" + +OBSERVATION DESCRIPTION: +The current page is the 'Products / Inventory / Catalog' section of the Magento Admin Panel. It displays a list of products with various details such as Name, Type, SKU, and more. The Nona Fitness Tank is listed as a configurable product with an option to edit it. The page includes a search textbox and several buttons for actions like adding products, filters, and search. There is no direct option visible for adding new size attributes. + +REASON: +To add the XXS size, it is necessary to create or enable it as an attribute. Since the XXS size was not available in the previous configuration steps, I need to investigate how to add or enable this size attribute. The logical next step is to explore the 'Attributes' section within the 'STORES' menu, as this is where product attributes are typically managed. + +ACTION: +click [20159] + diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py new file mode 100644 index 0000000..3e6bbf8 --- /dev/null +++ b/scripts/generate_test_data.py @@ -0,0 +1,27 @@ +"""Replace the website placeholders with website domains from env_config +Generate the test data""" +import json + +from browser_env.env_config import * + + +def main() -> None: + with open("config_files/test.raw.json", "r") as f: + raw = f.read() + raw = raw.replace("__GITLAB__", GITLAB) + raw = raw.replace("__REDDIT__", REDDIT) + raw = raw.replace("__SHOPPING__", SHOPPING) + raw = raw.replace("__SHOPPING_ADMIN__", SHOPPING_ADMIN) + raw = raw.replace("__WIKIPEDIA__", WIKIPEDIA) + raw = raw.replace("__MAP__", MAP) + with open("config_files/tasks/test.json", "w") as f: + f.write(raw) + # split to multiple files + data = json.loads(raw) + for idx, item in enumerate(data): + with open(f"config_files/tasks/{idx}.json", "w") as f: + json.dump(item, f, indent=2) + + +if __name__ == "__main__": + main() diff --git a/scripts/run_config.sh b/scripts/run_config.sh index 44d095e..4b3f105 100644 --- a/scripts/run_config.sh +++ b/scripts/run_config.sh @@ -1,11 +1,18 @@ -export SHOPPING="http://:7770" -export SHOPPING_ADMIN="http://:7780/admin" -export REDDIT="http://:9999" -export GITLAB="http://:8023" -export MAP="http://:3000" -export WIKIPEDIA="http://:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing" -export HOMEPAGE="http://:4399" -export OPENAI_API_KEY= -conda activate webarena +webarena_server_address="localhost" + +export SHOPPING="http://${webarena_server_address}:28082" +export SHOPPING_ADMIN="http://${webarena_server_address}:28083/admin" +export REDDIT="http://${webarena_server_address}:28080" +export GITLAB="http://${webarena_server_address}:28084" +export MAP="https://www.openstreetmap.org" +export WIKIPEDIA="http://${webarena_server_address}:28081/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing" +export HOMEPAGE="http://${webarena_server_address}:20080" + +export OPENAI_API_KEY="sk-xxx" # 改成你的key +export OPENAI_BASE_URL="https://aiproxy.lmzgc.cn:8080/v1" + +export GEMINI_API_KEY="AIzaSyBwE1234567890" # Optional, we provide several other agent base models, such as Claude and LLaMa. + + python browser_env/auto_login.py -python eval_webarena.py --config AgentOccam/configs/AgentOccam.yml \ No newline at end of file +python eval_webarena.py --config AgentOccam/configs/AgentOccam.yml