add webrl chat mode

This commit is contained in:
QZH-777 2024-11-14 20:04:38 +08:00
parent 521d7e999a
commit a1a6cbd209
5 changed files with 197 additions and 0 deletions

View File

@ -0,0 +1,13 @@
{
"intro": "# Setup\nYou are a professional web browsing agent assistant that can fulfill user's high-level instructions. Given Simplified html of the browsed webpage at each step, you plan operations in python-style pseudo code using provided functions, or customize functions (if necessary) and then provide their implementations. \n# More details about the code\nYour code should be readable, simple, and only **ONE-LINE-OF-CODE** at a time, avoid using loop statement and only use if-else control if necessary. Predefined functions are as follow:\n\n```\ndef do(action, argument, element):\n\t\"\"\"A single browsing operation on the webpage.\n\tArgs:\n\t\t:param action: one of the actions from [\"Click\", \"Right Click\", \"Type\", \"Search\", \"Hover\", \"Scroll Up\", \"Scroll Down\", \"Press Enter\", \"Switch Tab\", \"Select Dropdown Option\", \"Wait\"].\n\t\t:param argument: optional. Only for \"Type\", \"Search\", \"Switch Page\", and \"Select Dropdown Option\", indicating the content to type in, page number(start from 0) to switch, or key to press.\n\t\t \"Search\" action is equivalent to \"Type\" action plus \"Enter\" key press.\n\t\t:param element: optional. Only for \"Click\", \"Right Click\", \"Type\", \"Search\", \"Select Dropdown Option\", and \"Hover\". Should be specific element id in the html.\n\tReturns:\n\t\tNone. The webpage will be updated after executing the action.\n\t\"\"\"\n\ndef exit(message):\n\t\"\"\"Ending the browsing process if the assistant think it has fulfilled the goal.\n\tArgs:\n\t\t:param message: optional. If user's instruction is a question, return assistant's answer in the message based on the browsing content.\n\tReturns:\n\t\tNone.\n\t\"\"\"\n\ndef go_backward():\n\t\"\"\"Go back to the previous page.\n\t\"\"\"\n\ndef go_forward():\n \"\"\"Go forward to the next page.\n \"\"\"\n```\n\nHere are some examples:\n- # Element: the 'REPORTS' section on the left sidebar\ndo(action=\"Click\", element=\"7\")\n- # Element: the 'Period' dropdown, middle center\ndo(action=\"Select Dropdown Option\", argument=\"Month\", element=\"20\")\n- # Element: the 'From' date picker input field, middle center\ndo(action=\"Type\", argument=\"01/01/2023\", element=\"22\")\n- do(action=\"Scroll Down\")\n- exit(message=\"The top-3 best-selling products in January 2023 are: 1\")\n- # Element: The search bar\ndo(action=\"Search\", argument=\"international airport near Carnegie Mellon University within a driving distance of 50 km\", element=\"13\")\n- # Note: Pittsburgh International Airport, Southern Beltway, Findlay Township, Allegheny County, 15231, United States\n# Element: The field labeled 'Pittsburgh International Airport' in the top left corner\ndo(action=\"Type\", argument=\"Cleveland Hopkins International Airport\", element=\"14\")\n\nREMEMBER: \n- only **ONE-LINE-OF-CODE** at a time\n- Don't generate an operation element that you do not see in the screenshot.\n- Use \"# Element\" to describe the element you choose in the html.\n- Use '# Note\" to record information useful to answer the instruction if needed.\n- If you find yourself fallen into some sort of loop, try to use another method or change your action.\n- If you think a page is still loading or still playing animation and you want to wait a while, use \"Wait\" action.\n- You are acting in a real world, try your best not to reject user's demand. Solve all the problem you encounter.\n- If you think you didn't get expected webpage, you should try using more precise and locative description of the element.\n- You must make sure the target element of `find_element*` exists on current screenshot, if not, you should navigate to the target place first.\n- You must identify potential errors or mistakes made by `find_element*` function and correct them. If the webpage is not as expected, you should try to re-do or un-do the operation.\n- You should **NEVER** try to use the browser's address bar at the top of the page to navigate.\n- Your answer shouldn't be in a code snippet format. Just write the function name and its arguments.\n- For quote, exit, go_backward, go_forward request, you should strictly obey the format of quote, exit, go_backward, go_forward functions, answers like do(\"Quote\", xxx, None) or do(\"quote\", xxx, None)are not allowed.\n- If you use do function to perform \"Click\", \"Right Click\", \"Type\", \"Search\", \"Select Dropdown Option\", and \"Hover\", the param element must not be None.\n",
"examples": [],
"template": "",
"meta_data": {
"observation": "webrl",
"action_type": "webrl_id",
"keywords": [],
"prompt_constructor": "WebRLChatPromptConstructor",
"answer_phrase": "",
"action_splitter": ""
}
}

View File

@ -553,5 +553,65 @@ class WebRLPromptConstructor(PromptConstructor):
return prompt return prompt
def extract_action(self, response: str) -> str:
return response
class WebRLChatPromptConstructor(PromptConstructor):
"""The agent will direct predict the action"""
def __init__(
self,
instruction_path: str | Path,
lm_config: lm_config.LMConfig,
tokenizer: Tokenizer,
):
super().__init__(instruction_path, lm_config, tokenizer)
def construct(
self,
trajectory: Trajectory,
intent: str,
meta_data: dict[str, Any] = {},
) -> APIInput:
"""Construct prompt given the trajectory"""
state_info: StateInfo = trajectory[-1] # type: ignore[assignment]
obs = state_info["observation"][self.obs_modality]
max_obs_length = self.lm_config.gen_config["max_obs_length"]
if max_obs_length:
if self.lm_config.provider == "google":
print("NOTE: This is a Gemini model, so we use characters instead of tokens for max_obs_length.")
obs = obs[:max_obs_length]
else:
try:
obs = self.tokenizer.decode(self.tokenizer.encode(obs)[:max_obs_length]) # type: ignore[arg-type]
except:
print("NOTE: There is no available tokenizer, so we use characters instead of tokens for max_obs_length.")
obs = obs[:max_obs_length]
turn_num = len(meta_data["action_history"])
if turn_num == 1:
previous_action_str = []
else:
previous_action_str = meta_data["action_history"][1:]
index = turn_num - 1
conversations = []
for i in range(index - 1, -1, -1):
if i == 0:
content_user = f"Task Instruction: {intent}\n\nRound {i}\n{intent}"
content_assistant = f"{previous_action_str[i]}"
else:
content_user = f"Round {i}\n** Simplified html **"
content_assistant = f"{previous_action_str[i]}"
conversation = [{'role': 'user', 'content': content_user}, {'role': 'assistant', 'content': content_assistant}]
conversations = conversation + conversations
system_turn = [{'role': 'system', 'content': self.instruction['intro']}]
current_turn = [{'role': 'user', 'content': f'Round {index}\n\n{obs}'}]
conversations = system_turn + conversations + current_turn
return conversations
def extract_action(self, response: str) -> str: def extract_action(self, response: str) -> str:
return response return response

View File

@ -84,3 +84,28 @@ for src in file_list:
if len(file_list) > 1: if len(file_list) > 1:
get_result(all_result) get_result(all_result)
export_result(all_result, show_all=True) export_result(all_result, show_all=True)
with open('./config_files/wa/test_webarena_lite.raw.json') as fp:
configs = json.load(fp)
sub_results = {}
sub_ids = {}
for item in configs:
web = tuple(item['sites'])
task_id = int(item['task_id'])
old_task_id = int(item['old_task_id'])
if web not in sub_results:
sub_results[web] = []
if web not in sub_ids:
sub_ids[web] = []
if task_id in all_result:
sub_results[web].append(all_result[task_id])
if all_result[task_id] == 1:
sub_ids[web].append(old_task_id)
else:
sub_results[web].append(0)
for web in sub_results:
print(web, round(sum(sub_results[web]) / len(sub_results[web]) * 100, 1))
print('\n\n')
for web in sub_ids:
print(web, sorted(sub_ids[web]), len(sub_ids[web]))

View File

@ -0,0 +1,97 @@
#!/bin/bash
DATASET='webarena' # TODO: select from ['webarena', 'visualwebarena']
result_dir='' # TODO: set your result_dir
provider='openai' # TODO: select from ['openai', 'finetune', ...]
model='' # TODO: assign model name, which is used for action generation
planner_ip='' # TODO: ip address of the model you are deploying (only if you are deploying your own model using e.g. vllm)
instruction_path='agent/prompts/jsons/p_webrl_chat.json' # e.g., agent/prompts/jsons/p_cot_id_actree_2s.json
test_config_base_dir='config_files/wa/test_webarena_lite' # e.g., config_files/wa/test_webarena_lite
temperature=0.0
SERVER='' # TODO: your server address
MAP_SERVER='' # TODO: the server address for MAP tasks
OPENAI_API_KEY='' # TODO: if you test OpenAI APIs
OPENAI_ORGANIZATION=''
CONDA_ENV_NAME='' # TODO: the name of your conda environment for testing WebArena
ENV_VARIABLES="export DATASET=${DATASET}; export SHOPPING='http://${SERVER}:7770';export SHOPPING_ADMIN='http://${SERVER}:7780/admin';export REDDIT='http://${SERVER}:9999';export GITLAB='http://${SERVER}:8023';export MAP='http://${MAP_SERVER}:3000';export WIKIPEDIA='http://${SERVER}:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing';export HOMEPAGE='http://${SERVER}:4399';export OPENAI_API_KEY=${OPENAI_API_KEY};export OPENAI_ORGANIZATION=${OPENAI_ORGANIZATION}"
# get the number of tmux panes
num_panes=$(tmux list-panes | wc -l)
# calculate how many panes need to be created
let "panes_to_create = 7 - num_panes"
# array of tmux commands to create each pane
tmux_commands=(
'tmux split-window -h'
'tmux split-window -v'
'tmux select-pane -t 0; tmux split-window -v'
'tmux split-window -v'
'tmux select-pane -t 3; tmux split-window -v'
'tmux select-pane -t 5; tmux split-window -v'
)
# create panes up to 7
for ((i=0; i<$panes_to_create; i++)); do
eval ${tmux_commands[$i]}
done
#!/bin/bash
# Function to run a job
run_job() {
tmux select-pane -t $1
COMMAND="python run.py \
--instruction_path ${instruction_path} \
--test_start_idx $2 \
--test_end_idx $3 \
--result_dir ${result_dir} \
--test_config_base_dir ${test_config_base_dir} \
--provider ${provider} \
--model ${model} \
--mode chat \
--planner_ip ${planner_ip} \
--stop_token \"<|eot_id|>\" \
--temperature ${temperature} \
--max_obs_length 0 \
--max_tokens 2048 \
--viewport_width 1280 \
--viewport_height 720 \
--parsing_failure_th 5 \
--repeating_action_failure_th 5 \
--action_set_tag webrl_id --observation_type webrl"
tmux send-keys "tmux set mouse on; conda activate ${CONDA_ENV_NAME}; ${ENV_VARIABLES}; until ${COMMAND}; do echo 'crashed' >&2; sleep 1; done" C-m
sleep 3
}
TOLERANCE=2
run_batch() {
args=("$@") # save all arguments in an array
num_jobs=${#args[@]} # get number of arguments
for ((i=1; i<$num_jobs; i++)); do
run_job $i ${args[i-1]} ${args[i]}
done
# Wait for all jobs to finish
while tmux list-panes -F "#{pane_pid} #{pane_current_command}" | grep -q python; do
sleep 100 # wait for 10 seconds before checking again
done
# Run checker
while ! python scripts/check_error_runs.py ${result_dir} --delete_errors --tolerance ${TOLERANCE}; do
echo "Check failed, rerunning jobs..."
for ((i=1; i<$num_jobs; i++)); do
run_job $i ${args[i-1]} ${args[i]}
done
# Wait for all jobs to finish
while tmux list-panes -F "#{pane_pid} #{pane_current_command}" | grep -q python; do
sleep 100 # wait for 10 seconds before checking again
done
done
}
run_batch 0 28 56 84 112 140 165

View File

@ -15,6 +15,7 @@ cp -f new/run.py visualwebarena/run.py
cp -f new/agent.py visualwebarena/agent/agent.py cp -f new/agent.py visualwebarena/agent/agent.py
cp -f new/prompt_constructor.py visualwebarena/agent/prompts/prompt_constructor.py cp -f new/prompt_constructor.py visualwebarena/agent/prompts/prompt_constructor.py
cp -f new/p_webrl.json visualwebarena/agent/prompts/jsons/p_webrl.json cp -f new/p_webrl.json visualwebarena/agent/prompts/jsons/p_webrl.json
cp -f new/p_webrl_chat.json visualwebarena/agent/prompts/jsons/p_webrl_chat.json
# browser_env # browser_env
cp -f new/actions.py visualwebarena/browser_env/actions.py cp -f new/actions.py visualwebarena/browser_env/actions.py
@ -42,6 +43,7 @@ cp -f new/wa_parallel_run.sh visualwebarena/wa_parallel_run.sh
cp -f new/score.py visualwebarena/score.py cp -f new/score.py visualwebarena/score.py
cp -f new/wa_parallel_run_webrl.sh visualwebarena/wa_parallel_run_webrl.sh cp -f new/wa_parallel_run_webrl.sh visualwebarena/wa_parallel_run_webrl.sh
cp -f new/wa_parallel_run_webrl_chat.sh visualwebarena/wa_parallel_run_webrl_chat.sh
# 3. remove temporary files # 3. remove temporary files
mv visualwebarena/* . mv visualwebarena/* .