import base64 import io import json import re from pathlib import Path from typing import Any from PIL import Image from agent.prompts import * from browser_env import ( Action, ActionTypes, ObservationMetadata, StateInfo, action2str, ) HTML_TEMPLATE = """
{body} """ def get_render_action( action: Action, observation_metadata: dict[str, ObservationMetadata], action_set_tag: str, ) -> str: """Parse the predicted actions for rendering purpose. More comprehensive information""" match action_set_tag: case "id_accessibility_tree": text_meta_data = observation_metadata["text"] if action["element_id"] in text_meta_data["obs_nodes_info"]: node_content = text_meta_data["obs_nodes_info"][ action["element_id"] ]["text"] else: node_content = "No match found" action_str = f"{action['raw_prediction']}
{repr(action)}
{action2str(action, action_set_tag, node_content)}
{action['raw_prediction']}
{repr(action)}
{action2str(action, action_set_tag, node_content)}
{_config_str}\n" task_id = _config["task_id"] self.action_set_tag = action_set_tag self.render_file = open( Path(result_dir) / f"render_{task_id}.html", "a+", encoding="utf-8" ) self.render_file.truncate(0) # write init template self.render_file.write(HTML_TEMPLATE.format(body=f"{_config_str}")) self.render_file.read() self.render_file.flush() def render( self, action: Action, state_info: StateInfo, meta_data: dict[str, Any], render_screenshot: bool = False, ) -> None: """Render the trajectory""" # text observation observation = state_info["observation"] text_obs = observation["text"] info = state_info["info"] new_content = f"
{text_obs}