453 lines
19 KiB
Python
453 lines
19 KiB
Python
import asyncio
|
|
import os
|
|
import tempfile
|
|
import time
|
|
|
|
from playwright.async_api import async_playwright as playwright
|
|
from playwright.async_api import BrowserContext
|
|
from playwright.async_api import Page
|
|
from playwright.async_api import Playwright
|
|
|
|
from Agent_E.ae.core.notification_manager import NotificationManager
|
|
from Agent_E.ae.core.ui_manager import UIManager
|
|
from Agent_E.ae.utils.dom_mutation_observer import dom_mutation_change_detected
|
|
from Agent_E.ae.utils.dom_mutation_observer import handle_navigation_for_mutation_observer
|
|
from Agent_E.ae.utils.js_helper import beautify_plan_message
|
|
from Agent_E.ae.utils.js_helper import escape_js_message
|
|
from Agent_E.ae.utils.logger import logger
|
|
from Agent_E.ae.utils.ui_messagetype import MessageType
|
|
|
|
# Enusres that playwright does not wait for font loading when taking screenshots. Reference: https://github.com/microsoft/playwright/issues/28995
|
|
os.environ["PW_TEST_SCREENSHOT_NO_FONTS_READY"] = "1"
|
|
|
|
class PlaywrightManager:
|
|
"""
|
|
A singleton class to manage Playwright instances and browsers.
|
|
|
|
Attributes:
|
|
browser_type (str): The type of browser to use ('chromium', 'firefox', 'webkit').
|
|
isheadless (bool): Flag to launch the browser in headless mode or not.
|
|
|
|
The class ensures only one instance of itself, Playwright, and the browser is created during the application lifecycle.
|
|
"""
|
|
_homepage = "https://www.google.com"
|
|
_instance = None
|
|
_playwright = None # type: ignore
|
|
_browser_context = None
|
|
__async_initialize_done = False
|
|
_take_screenshots = False
|
|
_screenshots_dir = None
|
|
|
|
def __new__(cls, *args, **kwargs): # type: ignore
|
|
"""
|
|
Ensures that only one instance of PlaywrightManager is created (singleton pattern).
|
|
"""
|
|
if cls._instance is None:
|
|
cls._instance = super().__new__(cls)
|
|
cls._instance.__initialized = False
|
|
logger.debug("Playwright instance created..")
|
|
return cls._instance
|
|
|
|
|
|
def __init__(self, browser_type: str = "chromium", headless: bool = False, gui_input_mode: bool = True, screenshots_dir: str = "", take_screenshots: bool = False):
|
|
"""
|
|
Initializes the PlaywrightManager with the specified browser type and headless mode.
|
|
Initialization occurs only once due to the singleton pattern.
|
|
|
|
Args:
|
|
browser_type (str, optional): The type of browser to use. Defaults to "chromium".
|
|
headless (bool, optional): Flag to launch the browser in headless mode or not. Defaults to False (non-headless).
|
|
"""
|
|
if self.__initialized:
|
|
return
|
|
self.browser_type = browser_type
|
|
self.isheadless = headless
|
|
self.__initialized = True
|
|
self.notification_manager = NotificationManager()
|
|
self.user_response_event = asyncio.Event()
|
|
if gui_input_mode:
|
|
self.ui_manager: UIManager = UIManager()
|
|
|
|
self.set_take_screenshots(take_screenshots)
|
|
self.set_screenshots_dir(screenshots_dir)
|
|
|
|
|
|
async def async_initialize(self):
|
|
"""
|
|
Asynchronously initialize necessary components and handlers for the browser context.
|
|
"""
|
|
if self.__async_initialize_done:
|
|
return
|
|
|
|
# Step 1: Ensure Playwright is started and browser context is created
|
|
await self.start_playwright()
|
|
await self.ensure_browser_context()
|
|
|
|
# Step 2: Deferred setup of handlers
|
|
await self.setup_handlers()
|
|
|
|
# Step 3: Navigate to homepage
|
|
await self.go_to_homepage()
|
|
|
|
self.__async_initialize_done = True
|
|
|
|
|
|
async def ensure_browser_context(self):
|
|
"""
|
|
Ensure that a browser context exists, creating it if necessary.
|
|
"""
|
|
if self._browser_context is None:
|
|
await self.create_browser_context()
|
|
|
|
|
|
async def setup_handlers(self):
|
|
"""
|
|
Setup various handlers after the browser context has been ensured.
|
|
"""
|
|
await self.set_overlay_state_handler()
|
|
await self.set_user_response_handler()
|
|
await self.set_navigation_handler()
|
|
|
|
|
|
async def start_playwright(self):
|
|
"""
|
|
Starts the Playwright instance if it hasn't been started yet. This method is idempotent.
|
|
"""
|
|
if not PlaywrightManager._playwright:
|
|
PlaywrightManager._playwright: Playwright = await playwright().start()
|
|
|
|
|
|
async def stop_playwright(self):
|
|
"""
|
|
Stops the Playwright instance and resets it to None. This method should be called to clean up resources.
|
|
"""
|
|
# Close the browser context if it's initialized
|
|
if PlaywrightManager._browser_context is not None:
|
|
await PlaywrightManager._browser_context.close()
|
|
PlaywrightManager._browser_context = None
|
|
|
|
# Stop the Playwright instance if it's initialized
|
|
if PlaywrightManager._playwright is not None: # type: ignore
|
|
await PlaywrightManager._playwright.stop()
|
|
PlaywrightManager._playwright = None # type: ignore
|
|
|
|
|
|
async def create_browser_context(self):
|
|
user_dir:str = os.environ.get('BROWSER_STORAGE_DIR', '')
|
|
if self.browser_type == "chromium":
|
|
logger.info(f"User dir: {user_dir}")
|
|
try:
|
|
PlaywrightManager._browser_context = await PlaywrightManager._playwright.chromium.launch_persistent_context(user_dir,
|
|
channel= "chrome", headless=self.isheadless,
|
|
args=["--disable-blink-features=AutomationControlled",
|
|
"--disable-session-crashed-bubble", # disable the restore session bubble
|
|
"--disable-infobars", # disable informational popups,
|
|
],
|
|
no_viewport=True
|
|
)
|
|
except Exception as e:
|
|
if "Target page, context or browser has been closed" in str(e):
|
|
new_user_dir = tempfile.mkdtemp()
|
|
logger.error(f"Failed to launch persistent context with user dir {user_dir}: {e} Trying to launch with a new user dir {new_user_dir}")
|
|
PlaywrightManager._browser_context = await PlaywrightManager._playwright.chromium.launch_persistent_context(new_user_dir,
|
|
channel= "chrome", headless=self.isheadless,
|
|
args=["--disable-blink-features=AutomationControlled",
|
|
"--disable-session-crashed-bubble", # disable the restore session bubble
|
|
"--disable-infobars", # disable informational popups,
|
|
],
|
|
no_viewport=True
|
|
)
|
|
elif "Chromium distribution 'chrome' is not found " in str(e):
|
|
raise ValueError("Chrome is not installed on this device. Install Google Chrome or install playwright using 'playwright install chrome'. Refer to the readme for more information.") from None
|
|
else:
|
|
raise e from None
|
|
else:
|
|
raise ValueError(f"Unsupported browser type: {self.browser_type}")
|
|
|
|
|
|
async def get_browser_context(self):
|
|
"""
|
|
Returns the existing browser context, or creates a new one if it doesn't exist.
|
|
"""
|
|
await self.ensure_browser_context()
|
|
return self._browser_context
|
|
|
|
|
|
async def get_current_url(self) -> str | None:
|
|
"""
|
|
Get the current URL of current page
|
|
|
|
Returns:
|
|
str | None: The current URL if any.
|
|
"""
|
|
try:
|
|
current_page: Page =await self.get_current_page()
|
|
return current_page.url
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
async def get_current_page(self) -> Page :
|
|
"""
|
|
Get the current page of the browser
|
|
|
|
Returns:
|
|
Page: The current page if any.
|
|
"""
|
|
try:
|
|
browser: BrowserContext = await self.get_browser_context() # type: ignore
|
|
# Filter out closed pages
|
|
pages: list[Page] = [page for page in browser.pages if not page.is_closed()]
|
|
page: Page | None = pages[-1] if pages else None
|
|
logger.debug(f"Current page: {page.url if page else None}")
|
|
if page is not None:
|
|
return page
|
|
else:
|
|
page:Page = await browser.new_page() # type: ignore
|
|
return page
|
|
except Exception:
|
|
logger.warn("Browser context was closed. Creating a new one.")
|
|
PlaywrightManager._browser_context = None
|
|
_browser:BrowserContext= await self.get_browser_context() # type: ignore
|
|
page: Page | None = await self.get_current_page()
|
|
return page
|
|
|
|
|
|
async def close_all_tabs(self, keep_first_tab: bool = True):
|
|
"""
|
|
Closes all tabs in the browser context, except for the first tab if `keep_first_tab` is set to True.
|
|
|
|
Args:
|
|
keep_first_tab (bool, optional): Whether to keep the first tab open. Defaults to True.
|
|
"""
|
|
browser_context = await self.get_browser_context()
|
|
pages: list[Page] = browser_context.pages #type: ignore
|
|
pages_to_close: list[Page] = pages[1:] if keep_first_tab else pages # type: ignore
|
|
for page in pages_to_close: # type: ignore
|
|
await page.close() # type: ignore
|
|
|
|
|
|
async def close_except_specified_tab(self, page_to_keep: Page):
|
|
"""
|
|
Closes all tabs in the browser context, except for the specified tab.
|
|
|
|
Args:
|
|
page_to_keep (Page): The Playwright page object representing the tab that should remain open.
|
|
"""
|
|
browser_context = await self.get_browser_context()
|
|
for page in browser_context.pages: # type: ignore
|
|
if page != page_to_keep: # Check if the current page is not the one to keep
|
|
await page.close() # type: ignore
|
|
|
|
|
|
async def go_to_homepage(self):
|
|
page:Page = await PlaywrightManager.get_current_page(self)
|
|
await page.goto(self._homepage)
|
|
|
|
|
|
async def set_navigation_handler(self):
|
|
page:Page = await PlaywrightManager.get_current_page(self)
|
|
page.on("domcontentloaded", self.ui_manager.handle_navigation) # type: ignore
|
|
page.on("domcontentloaded", handle_navigation_for_mutation_observer) # type: ignore
|
|
await page.expose_function("dom_mutation_change_detected", dom_mutation_change_detected) # type: ignore
|
|
|
|
async def set_overlay_state_handler(self):
|
|
logger.debug("Setting overlay state handler")
|
|
context = await self.get_browser_context()
|
|
await context.expose_function('overlay_state_changed', self.overlay_state_handler) # type: ignore
|
|
await context.expose_function('show_steps_state_changed',self.show_steps_state_handler) # type: ignore
|
|
|
|
async def overlay_state_handler(self, is_collapsed: bool):
|
|
page = await self.get_current_page()
|
|
self.ui_manager.update_overlay_state(is_collapsed)
|
|
if not is_collapsed:
|
|
await self.ui_manager.update_overlay_chat_history(page)
|
|
|
|
async def show_steps_state_handler(self, show_details: bool):
|
|
page = await self.get_current_page()
|
|
await self.ui_manager.update_overlay_show_details(show_details, page)
|
|
|
|
async def set_user_response_handler(self):
|
|
context = await self.get_browser_context()
|
|
await context.expose_function('user_response', self.receive_user_response) # type: ignore
|
|
|
|
|
|
async def notify_user(self, message: str, message_type: MessageType = MessageType.STEP):
|
|
"""
|
|
Notify the user with a message.
|
|
|
|
Args:
|
|
message (str): The message to notify the user with.
|
|
message_type (enum, optional): Values can be 'PLAN', 'QUESTION', 'ANSWER', 'INFO', 'STEP'. Defaults to 'STEP'.
|
|
To Do: Convert to Enum.
|
|
"""
|
|
|
|
if message.startswith(":"):
|
|
message = message[1:]
|
|
|
|
if message.endswith(","):
|
|
message = message[:-1]
|
|
|
|
if message_type == MessageType.PLAN:
|
|
message = beautify_plan_message(message)
|
|
message = "Plan:\n" + message
|
|
elif message_type == MessageType.STEP:
|
|
if "confirm" in message.lower():
|
|
message = "Verify: " + message
|
|
else:
|
|
message = "Next step: " + message
|
|
elif message_type == MessageType.QUESTION:
|
|
message = "Question: " + message
|
|
elif message_type == MessageType.ANSWER:
|
|
message = "Response: " + message
|
|
|
|
safe_message = escape_js_message(message)
|
|
self.ui_manager.new_system_message(safe_message, message_type)
|
|
|
|
if self.ui_manager.overlay_show_details == False: # noqa: E712
|
|
if message_type not in (MessageType.PLAN, MessageType.QUESTION, MessageType.ANSWER, MessageType.INFO):
|
|
return
|
|
|
|
if self.ui_manager.overlay_show_details == True: # noqa: E712
|
|
if message_type not in (MessageType.PLAN, MessageType.QUESTION , MessageType.ANSWER, MessageType.INFO, MessageType.STEP):
|
|
return
|
|
|
|
safe_message_type = escape_js_message(message_type.value)
|
|
try:
|
|
js_code = f"addSystemMessage({safe_message}, is_awaiting_user_response=false, message_type={safe_message_type});"
|
|
page = await self.get_current_page()
|
|
await page.evaluate(js_code)
|
|
except Exception as e:
|
|
logger.error(f"Failed to notify user with message \"{message}\". However, most likey this will work itself out after the page loads: {e}")
|
|
|
|
self.notification_manager.notify(message, message_type.value)
|
|
|
|
async def highlight_element(self, selector: str, add_highlight: bool):
|
|
try:
|
|
page: Page = await self.get_current_page()
|
|
if add_highlight:
|
|
# Add the 'agente-ui-automation-highlight' class to the element. This class is used to apply the fading border.
|
|
await page.eval_on_selector(selector, '''e => {
|
|
let originalBorderStyle = e.style.border;
|
|
e.classList.add('agente-ui-automation-highlight');
|
|
e.addEventListener('animationend', () => {
|
|
e.classList.remove('agente-ui-automation-highlight')
|
|
});}''')
|
|
logger.debug(f"Applied pulsating border to element with selector {selector} to indicate text entry operation")
|
|
else:
|
|
# Remove the 'agente-ui-automation-highlight' class from the element.
|
|
await page.eval_on_selector(selector, "e => e.classList.remove('agente-ui-automation-highlight')")
|
|
logger.debug(f"Removed pulsating border from element with selector {selector} after text entry operation")
|
|
except Exception:
|
|
# This is not significant enough to fail the operation
|
|
pass
|
|
|
|
async def receive_user_response(self, response: str):
|
|
self.user_response = response # Store the response for later use.
|
|
logger.debug(f"Received user response to system prompt: {response}")
|
|
# Notify event loop that the user's response has been received.
|
|
self.user_response_event.set()
|
|
|
|
|
|
async def prompt_user(self, message: str) -> str:
|
|
"""
|
|
Prompt the user with a message and wait for a response.
|
|
|
|
Args:
|
|
message (str): The message to prompt the user with.
|
|
|
|
Returns:
|
|
str: The user's response.
|
|
"""
|
|
logger.debug(f"Prompting user with message: \"{message}\"")
|
|
#self.ui_manager.new_system_message(message)
|
|
|
|
page = await self.get_current_page()
|
|
|
|
await self.ui_manager.show_overlay(page)
|
|
self.log_system_message(message, MessageType.QUESTION) # add the message to history after the overlay is opened to avoid double adding it. add_system_message below will add it
|
|
|
|
safe_message = escape_js_message(message)
|
|
|
|
js_code = f"addSystemMessage({safe_message}, is_awaiting_user_response=true, message_type='question');"
|
|
await page.evaluate(js_code)
|
|
|
|
await self.user_response_event.wait()
|
|
result = self.user_response
|
|
logger.info(f"User prompt reponse to \"{message}\": {result}")
|
|
self.user_response_event.clear()
|
|
self.user_response = ""
|
|
self.ui_manager.new_user_message(result)
|
|
return result
|
|
|
|
def set_take_screenshots(self, take_screenshots: bool):
|
|
self._take_screenshots = take_screenshots
|
|
|
|
def get_take_screenshots(self):
|
|
return self._take_screenshots
|
|
|
|
def set_screenshots_dir(self, screenshots_dir: str):
|
|
self._screenshots_dir = screenshots_dir
|
|
|
|
def get_screenshots_dir(self):
|
|
return self._screenshots_dir
|
|
|
|
async def take_screenshots(self, name: str, page: Page|None, full_page: bool = True, include_timestamp: bool = True,
|
|
load_state: str = 'domcontentloaded', take_snapshot_timeout: int = 5*1000):
|
|
if not self._take_screenshots:
|
|
return
|
|
if page is None:
|
|
page = await self.get_current_page()
|
|
|
|
screenshot_name = name
|
|
|
|
if include_timestamp:
|
|
screenshot_name = f"{int(time.time_ns())}_{screenshot_name}"
|
|
screenshot_name += ".png"
|
|
screenshot_path = f"{self.get_screenshots_dir()}/{screenshot_name}"
|
|
try:
|
|
await page.wait_for_load_state(state=load_state, timeout=take_snapshot_timeout) # type: ignore
|
|
await page.screenshot(path=screenshot_path, full_page=full_page, timeout=take_snapshot_timeout, caret="initial", scale="device")
|
|
logger.debug(f"Screen shot saved to: {screenshot_path}")
|
|
except Exception as e:
|
|
logger.error(f"Failed to take screenshot and save to \"{screenshot_path}\". Error: {e}")
|
|
|
|
|
|
def log_user_message(self, message: str):
|
|
"""
|
|
Log the user's message.
|
|
|
|
Args:
|
|
message (str): The user's message to log.
|
|
"""
|
|
self.ui_manager.new_user_message(message)
|
|
|
|
|
|
def log_system_message(self, message: str, type: MessageType = MessageType.STEP):
|
|
"""
|
|
Log a system message.
|
|
|
|
Args:
|
|
message (str): The system message to log.
|
|
"""
|
|
self.ui_manager.new_system_message(message, type)
|
|
|
|
async def update_processing_state(self, processing_state: str):
|
|
"""
|
|
Update the processing state of the overlay.
|
|
|
|
Args:
|
|
is_processing (str): "init", "processing", "done"
|
|
"""
|
|
page = await self.get_current_page()
|
|
|
|
await self.ui_manager.update_processing_state(processing_state, page)
|
|
|
|
async def command_completed(self, command: str, elapsed_time: float | None = None):
|
|
"""
|
|
Notify the overlay that the command has been completed.
|
|
"""
|
|
logger.debug(f"Command \"{command}\" has been completed. Focusing on the overlay input if it is open.")
|
|
page = await self.get_current_page()
|
|
await self.ui_manager.command_completed(page, command, elapsed_time)
|