Update VAB-WebArena-Lite parameters
This commit is contained in:
parent
0c6f549214
commit
cdacb9ab21
|
@ -72,7 +72,7 @@ For VAB-WebArena-Lite, it is based on [WebArena](https://github.com/webarena-x/w
|
||||||
* [VAB-OmniGibson Setup](docs/detailed_setups/VAB-OmniGibson.md)
|
* [VAB-OmniGibson Setup](docs/detailed_setups/VAB-OmniGibson.md)
|
||||||
* [VAB-Minecraft Setup](docs/detailed_setups/VAB-Minecraft.md)
|
* [VAB-Minecraft Setup](docs/detailed_setups/VAB-Minecraft.md)
|
||||||
* VAB-Mobile: Ongoing
|
* VAB-Mobile: Ongoing
|
||||||
* [VAB-WebArena-Lite Setup](VAB-WebArena-Lite/README.md) (Separate installation and evaluation method)
|
* [VAB-WebArena-Lite Setup](VAB-WebArena-Lite) (Separate installation and evaluation method)
|
||||||
* VAB-CSS: Ongoing
|
* VAB-CSS: Ongoing
|
||||||
|
|
||||||
### Step 2. Configure the Agent
|
### Step 2. Configure the Agent
|
||||||
|
|
|
@ -147,7 +147,7 @@ python score.py <your_result_dir>
|
||||||
|
|
||||||
### 👍 Run Parallel Agent For Evaluation (Recommended)
|
### 👍 Run Parallel Agent For Evaluation (Recommended)
|
||||||
|
|
||||||
To run the tests in parallel, you can first configure `wa_parallel_run.sh`, then run it. We default split the test set to 5 parallel-group for evaluation in VAB.
|
To run the tests in parallel, you can first configure `wa_parallel_run.sh`, then run it. We default split the test set to 8 parallel-group for evaluation in VAB.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Remember to first launch a tmux session
|
# Remember to first launch a tmux session
|
||||||
|
|
|
@ -1,17 +1,17 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
DATASET='webarena' # webarena, visualwebarena
|
DATASET='webarena' # TODO: select from ['webarena', 'visualwebarena']
|
||||||
result_dir=''
|
result_dir='' # TODO: set your result_dir
|
||||||
provider=''
|
provider='' # TODO: select from ['openai', 'finetune', ...]
|
||||||
model=''
|
model='' # TODO: assign model name. If `provider == finetune`, choose `finetuned`
|
||||||
instruction_path='agent/prompts/jsons/p_som_cot_id_actree_3s.json' # e.g., agent/prompts/jsons/p_cot_id_actree_2s.json
|
instruction_path='agent/prompts/jsons/p_som_cot_id_actree_3s.json' # e.g., agent/prompts/jsons/p_cot_id_actree_2s.json
|
||||||
test_config_base_dir='config_files/wa/test_webarena_lite' # e.g., config_files/wa/test_webarena_lite
|
test_config_base_dir='config_files/wa/test_webarena_lite' # e.g., config_files/wa/test_webarena_lite
|
||||||
temperature=0.0
|
temperature=0.0
|
||||||
|
|
||||||
SERVER='' # your server address
|
SERVER='' # TODO: your server address
|
||||||
MAP_SERVER='' # the same as SERVER
|
MAP_SERVER='' # TODO: the server address for MAP tasks
|
||||||
OPENAI_API_KEY=''
|
OPENAI_API_KEY='' # TODO: if you test OpenAI APIs
|
||||||
OPENAI_ORGANIZATION=''
|
OPENAI_ORGANIZATION=''
|
||||||
CONDA_ENV_NAME='' # the name of your conda environment
|
CONDA_ENV_NAME='' # TODO: the name of your conda environment for testing WebArena
|
||||||
|
|
||||||
ENV_VARIABLES="export DATASET=${DATASET}; export SHOPPING='http://${SERVER}:7770';export SHOPPING_ADMIN='http://${SERVER}:7780/admin';export REDDIT='http://${SERVER}:9999';export GITLAB='http://${SERVER}:8023';export MAP='http://${MAP_SERVER}:3000';export WIKIPEDIA='http://${SERVER}:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing';export HOMEPAGE='http://${SERVER}:4399';export OPENAI_API_KEY=${OPENAI_API_KEY};export OPENAI_ORGANIZATION=${OPENAI_ORGANIZATION}"
|
ENV_VARIABLES="export DATASET=${DATASET}; export SHOPPING='http://${SERVER}:7770';export SHOPPING_ADMIN='http://${SERVER}:7780/admin';export REDDIT='http://${SERVER}:9999';export GITLAB='http://${SERVER}:8023';export MAP='http://${MAP_SERVER}:3000';export WIKIPEDIA='http://${SERVER}:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing';export HOMEPAGE='http://${SERVER}:4399';export OPENAI_API_KEY=${OPENAI_API_KEY};export OPENAI_ORGANIZATION=${OPENAI_ORGANIZATION}"
|
||||||
|
|
||||||
|
@ -42,7 +42,7 @@ done
|
||||||
# Function to run a job
|
# Function to run a job
|
||||||
run_job() {
|
run_job() {
|
||||||
tmux select-pane -t $1
|
tmux select-pane -t $1
|
||||||
tmux send-keys "tmux set mouse on; conda activate ${CONDA_ENV_NAME}; ${ENV_VARIABLES}; until python run.py --viewport_width 1280 --viewport_height 720 --test_start_idx $2 --test_end_idx $3 --provider ${provider} --model ${model} --instruction_path ${instruction_path} --temperature ${temperature} --test_config_base_dir ${test_config_base_dir} --result_dir ${result_dir}; do echo 'crashed' >&2; sleep 1; done" C-m
|
tmux send-keys "tmux set mouse on; conda activate ${CONDA_ENV_NAME}; ${ENV_VARIABLES}; until python run.py --viewport_width 1280 --viewport_height 720 --test_start_idx $2 --test_end_idx $3 --provider ${provider} --model ${model} --instruction_path ${instruction_path} --temperature ${temperature} --test_config_base_dir ${test_config_base_dir} --result_dir ${result_dir} --action_set_tag som --observation_type image_som; do echo 'crashed' >&2; sleep 1; done" C-m
|
||||||
sleep 3
|
sleep 3
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user