AgentOccam/webagents_step/agents/step_agent.py

from webagents_step.agents.agent import Agent
from webagents_step.utils.stack import Stack
from webagents_step.agents.prompt_agent import PromptAgent

from typing import List, Dict
import re
import time

class StepAgent(Agent):
    def __init__(self, max_actions: int = 10, verbose: bool = False, logging: bool = False,
                 debug: bool = False,
                 root_action: str = None,
                 action_to_prompt_dict: Dict = None,
                 low_level_action_list: List = None,
                 model: str = "gpt-3.5-turbo",
                 prompt_mode: str = "chat", previous_actions: List = None):
        super().__init__(max_actions=max_actions, verbose=verbose, logging=logging, previous_actions=previous_actions)
        self.debug = debug
        self.root_action = root_action
        self.action_to_prompt_dict = {} if action_to_prompt_dict is None else action_to_prompt_dict
        self.low_level_action_list = [] if low_level_action_list is None else low_level_action_list
        self.model = model
        self.prompt_mode = prompt_mode
        self.stack = Stack()

    def is_done(self, action):
        if action and "stop" in action:
            return True
        return False

    def is_low_level_action(self, action):
        if not action:
            return False
        action_type = action.split()[0]
        return (action_type in self.low_level_action_list)

    def is_high_level_action(self, action):
        if not action:
            return False
        action_type = action.split()[0]
        return (action_type in self.action_to_prompt_dict)

    def init_root_agent(self, objective):
        root_prompt_template = self.action_to_prompt_dict[self.root_action]
        agent = PromptAgent(
            prompt_template=root_prompt_template,
            model=self.model,
            prompt_mode=self.prompt_mode,
            max_actions=self.max_actions,
            verbose=self.verbose,
            logging=self.logging,
            debug=self.debug,
            previous_actions=[],
            previous_reasons=[],
            previous_responses=[]
        )
        return {'agent': agent, 'objective': objective}

    def init_agent(self, action):
        pattern = r'(\w+)\s+\[(.*?)\]'
        matches = re.findall(pattern, action)
        action_type, _ = matches[0]
        objective = action
        prompt_template = self.action_to_prompt_dict[action_type]
        agent = PromptAgent(
            prompt_template=prompt_template,
            model=self.model,
            prompt_mode=self.prompt_mode,
            max_actions=self.max_actions,
            verbose=self.verbose,
            logging=self.logging,
            debug=self.debug,
            previous_actions=[],
            previous_reasons=[],
            previous_responses=[]
        )
        return {'agent': agent, 'objective': objective}

    def predict_action(self, objective, observation, url=None):
        observation = observation["text"] if isinstance(observation, dict) else observation
        if self.stack.is_empty():
            new_element = self.init_root_agent(objective=objective)
            self.stack.push(new_element)

        action, reason = None, None
        while not self.stack.is_empty():
            element = self.stack.peek()
            action, reason = element['agent'].predict_action(objective=element['objective'], observation=observation, url=url)
            if (not self.is_done(action)) and self.is_low_level_action(action):
                element['agent'].receive_response("")
                return action, reason
            if (not self.is_done(action)) and self.is_high_level_action(action):
                new_element = self.init_agent(action)
                self.stack.push(new_element)
                if self.logging:
                    self.log_step(objective=element['objective'], url=url, observation=observation, action=action, reason=reason, status={})
                continue
            if self.is_done(action):
                self.stack.pop()
                if not self.stack.is_empty():
                    self.stack.peek()['agent'].receive_response(re.search(r"\[(.*?)\]", action).group(1))
                if self.logging:
                    self.log_step(objective=element['objective'], url=url, observation=observation, action=action, reason=reason, status={})
                continue
        return action, reason