AgentOccam/browser_env/utils.py
2025-01-22 11:32:35 -08:00

81 lines
1.6 KiB
Python

from dataclasses import dataclass
from io import BytesIO
from typing import Any, Dict, TypedDict, Union
import numpy as np
import numpy.typing as npt
from PIL import Image
@dataclass
class DetachedPage:
url: str
content: str # html
def png_bytes_to_numpy(png: bytes) -> npt.NDArray[np.uint8]:
"""Convert png bytes to numpy array
Example:
>>> fig = go.Figure(go.Scatter(x=[1], y=[1]))
>>> plt.imshow(png_bytes_to_numpy(fig.to_image('png')))
"""
return np.array(Image.open(BytesIO(png)))
class AccessibilityTreeNode(TypedDict):
nodeId: str
ignored: bool
role: dict[str, Any]
chromeRole: dict[str, Any]
name: dict[str, Any]
properties: list[dict[str, Any]]
childIds: list[str]
parentId: str
backendDOMNodeId: str
frameId: str
bound: list[float] | None
union_bound: list[float] | None
offsetrect_bound: list[float] | None
class DOMNode(TypedDict):
nodeId: str
nodeType: str
nodeName: str
nodeValue: str
attributes: str
backendNodeId: str
parentId: str
childIds: list[str]
cursor: int
union_bound: list[float] | None
class BrowserConfig(TypedDict):
win_top_bound: float
win_left_bound: float
win_width: float
win_height: float
win_right_bound: float
win_lower_bound: float
device_pixel_ratio: float
class BrowserInfo(TypedDict):
DOMTree: dict[str, Any]
config: BrowserConfig
AccessibilityTree = list[AccessibilityTreeNode]
DOMTree = list[DOMNode]
Observation = str | npt.NDArray[np.uint8]
class StateInfo(TypedDict):
observation: dict[str, Observation]
info: Dict[str, Any]