AgentOccam/browser_env/html_tools/utils.py

from lxml import html
def get_xpath_top_down(element: html.HtmlElement, id_column: str='temp_id', label_column: str='temp_clickable_label', path: str='', order: int=0,
                        in_svg: bool=False, temp_id: int=0) -> tuple[int, dict[str, str], dict[str]]:
    used_labels, i2xpath = {}, {}
    # path
    tag = element.tag.lower()
    in_svg = in_svg or (tag == 'svg')

    if not in_svg and 'id' in element.attrib:
        node_id = element.attrib['id']
        path = f'//*[@id="{node_id}"]'
    else:
        suffix = f'[{order}]' if order > 0 else ''
        prefix = f'*[name()="{tag}"]' if in_svg else tag
        path = path + '/' + prefix + suffix

    # add temp id
    element.attrib[id_column] = str(temp_id)
    ori_label = element.attrib.get(label_column, '')
    if ori_label != '':
        used_labels[ori_label] = True

    bid = str(temp_id)
    i2xpath[bid] = path
    i2xpath[path] = bid
    i2xpath[f'xpath/{path}'] = bid
    i2xpath[f'xpath=/{path}'] = bid

    temp_id += 1

    # traverse node
    children = element.getchildren()
    tag_dict = {}
    id_list = []
    for child in children:
        ctag = child.tag.lower()
        if ctag not in tag_dict:
            tag_dict[ctag] = 0
        tag_dict[ctag] += 1
        id_list.append(tag_dict[ctag])

    for cid, child in zip(id_list, children):
        ctag = child.tag.lower()
        cod = cid if tag_dict[ctag] > 1 else 0
        temp_id, i2x, ulabels = get_xpath_top_down(child, id_column, label_column, path, cod, in_svg, temp_id)
        i2xpath.update(i2x)
        used_labels.update(ulabels)

    return temp_id, i2xpath, used_labels

def print_html_object(obj: str='') -> str:
    tab_cnt = 0
    result, content, sep = '', '', ''
    last_is_left, last_is_right = False, False
    for ch in obj:
        if ch == '<':
            result += '\n'
            if len(content.strip()) > 0:
                result += sep + content.strip() + '\n'
            result += sep + '<'

            tab_cnt += 1
            sep = '  ' * tab_cnt

            content = ''
            last_is_right = False
            last_is_left = True
        elif ch == '>':
            if last_is_left:
                result += content
            else:
                if last_is_right:
                    result += '\n'
                if len(content.strip()) > 0:
                    result += sep + content.strip() + '\n'

            tab_cnt -= 1
            sep = '  ' * tab_cnt

            if not last_is_left:
                result += sep

            result += '>'
            content = ''

            last_is_right = True
            last_is_left = False
        else:
            content += ch

    return result

def rect2tuple(rect: str) -> tuple[int, int, int, int]:
    if rect is None or type(rect) != type('str'):
        return None
    rect = rect.strip()
    if rect.count(',') != 3:
        return None
    rect = rect.split(',')
    rect = [float(r) for r in rect]
    return tuple(rect)