Merge pull request #12 from QZH-777/main

update VAB-WebArena-Lite
2024-11-24 11:28:58 +08:00 · 2024-11-24 11:28:58 +08:00 · dcfbd4348e
commit dcfbd4348e
parent d838c370f3 42fc28a915
1 changed files with 22 additions and 0 deletions
--- a/VAB-WebArena-Lite/new/run.py
+++ b/VAB-WebArena-Lite/new/run.py
@ -506,6 +506,28 @@ def test(
                    trajectory.append(create_stop_action(""))
                    break
            # save trajectory
            if args.observation_type == 'webrl':
                current_path = os.path.join(args.result_dir, 'traces', f"{task_id}.jsonl")
                traces = []
                for i in range(1, len(trajectory), 2):
                    action = trajectory[i]
                    state_info = trajectory[i - 1]
                    obs = state_info["observation"]['text']
                    action_str = action['raw_prediction']
                    item = {
                        'trace_id': task_id,
                        'index': i // 2,
                        'prompt': intent if i == 1 else '** Simplified html **',
                        'html': obs,
                        'response': action_str,
                        'target': intent 
                    }
                    traces.append(item)
                with open(current_path, 'w') as f:
                    for item in traces:
                        f.write(json.dumps(item) + '\n')
            # NOTE: eval_caption_image_fn is used for running eval_vqa functions.
            evaluator = evaluator_router(
                config_file, captioning_fn=eval_caption_image_fn