3595 lines
107 KiB
JSON
3595 lines
107 KiB
JSON
[
|
|
{
|
|
"task_id": 103,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Display the list of issues in the kkroening/ffmpeg-python repository that have labels related to questions",
|
|
"strategies": [
|
|
"Filter Issues by Label"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 109,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Presents the monthly count of successful orders from Jan to December 2022 in MM:COUNT format",
|
|
"strategies": [
|
|
"Navigate to Orders Report",
|
|
"Configure Report Parameters",
|
|
"Generate and Organize Report"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Configure Parameters/Settings",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US3",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": "January: 11 orders Feburary: 16 orders March: 14 orders April: 7 orders May: 8 orders June: 13 orders July: 9 orders August: 8 orders Sepetember: 10 orders Octorbor: 4 orders November: 5 orders December: 10 orders "
|
|
},
|
|
{
|
|
"task_id": 115,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Show me the name of the customers who have expressed dissatisfaction with Chloe tank",
|
|
"strategies": [
|
|
"Navigate to Product Reviews Report",
|
|
"Search for 'chloe tank' Product Reviews"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 117,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "What is the date when I made my first purchase on this site?",
|
|
"strategies": [
|
|
"Access the Order History to Locate the Earliest Order",
|
|
"Identify and View the Earliest Order's Details",
|
|
"Retrieve the Purchase Date from Order Details"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"View/Inspect Item Details",
|
|
"Extract/Retrieve Information"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US5",
|
|
"US6"
|
|
],
|
|
"reference_answer_raw_annotation": "3/2/22"
|
|
},
|
|
{
|
|
"task_id": 123,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Tell me the reasons why customers like Circe's products",
|
|
"strategies": [
|
|
"Navigate to Product Reviews Report",
|
|
"Analyze Product Reviews Data",
|
|
"Access and Review Customer Feedback"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Analyze/Evaluate/Verify Data",
|
|
"View/Inspect Item Details"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US7",
|
|
"US5"
|
|
],
|
|
"reference_answer_raw_annotation": "Warm and comfortable. True to size."
|
|
},
|
|
{
|
|
"task_id": 125,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "What is the price range of teeth grinding mouth guard in the One Stop Market?",
|
|
"strategies": [
|
|
"Search for Teeth Grinding Mouth Guards",
|
|
"Sort Results by Price",
|
|
"Navigate Through Pages to Determine Price Range"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"Search/Filter/Sort Data",
|
|
"Navigate Within Data/Results"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US2",
|
|
"US8"
|
|
],
|
|
"reference_answer_raw_annotation": "$1.46 - $85"
|
|
},
|
|
{
|
|
"task_id": 127,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "What brands appear most frequently among the top search terms?",
|
|
"strategies": [
|
|
"Navigate to the Search Terms Report",
|
|
"Sort and Analyze Search Term Data"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": "Hollister, Joust, Antonia"
|
|
},
|
|
{
|
|
"task_id": 131,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "What's the total number of items sold in the most recent 7 orders?",
|
|
"strategies": [
|
|
"Navigate to the Orders Page",
|
|
"Prepare Order List for Analysis",
|
|
"Extract Item Count from Recent Orders",
|
|
"Return to Main Context"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Extract/Retrieve Information",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US6",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": "25"
|
|
},
|
|
{
|
|
"task_id": 139,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "What is the estimated driving time between the hometown of Joe Biden and Bridgeport?",
|
|
"strategies": [
|
|
"Locate Joe Biden's Hometown via Wikipedia",
|
|
"Set Up Route Planning in OpenStreetMap",
|
|
"Retrieve Driving Time Estimate"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Configure Parameters/Settings",
|
|
"Extract/Retrieve Information"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US3",
|
|
"US6"
|
|
],
|
|
"reference_answer_raw_annotation": "3h 20min"
|
|
},
|
|
{
|
|
"task_id": 149,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "What is the color configuration of the artifical plants I bought Feb 2023",
|
|
"strategies": [
|
|
"Access Account and Navigate to Order History",
|
|
"Locate February 2023 Orders",
|
|
"Extract Product Color Information"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Extract/Retrieve Information"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US6"
|
|
],
|
|
"reference_answer_raw_annotation": "Green-vines"
|
|
},
|
|
{
|
|
"task_id": 15,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Tell me the the number of reviews that our store received by far that mention term \"best\"",
|
|
"strategies": [
|
|
"Navigate to Product Reviews Report",
|
|
"Filter Reviews Containing 'best'",
|
|
"Retrieve Results"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Extract/Retrieve Information"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US6"
|
|
],
|
|
"reference_answer_raw_annotation": "2"
|
|
},
|
|
{
|
|
"task_id": 155,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "What is the minimum travel time by car from Animal Rescue League of Pittsburgh to Schenley park?",
|
|
"strategies": [
|
|
"Search for Starting Point",
|
|
"Initiate Directions from Starting Point",
|
|
"Configure and Generate Route"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"Execute Action/Process",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US4",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": "9min"
|
|
},
|
|
{
|
|
"task_id": 156,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Checkout merge requests assigned to me",
|
|
"strategies": [
|
|
"Use Search to Filter Merge Requests"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 157,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Show all customers",
|
|
"strategies": [
|
|
"Access the \"All Customers\" List"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 162,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all 40 cards",
|
|
"strategies": [
|
|
"Search for Nintendo Switch Card Storage Solutions",
|
|
"Evaluate Product Options"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 167,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "What are the main criticisms of this product? Please extract the relevant sentences.",
|
|
"strategies": [
|
|
"Access Customer Reviews Section",
|
|
"Analyze Review Content",
|
|
"Verify Rating Context"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Analyze/Evaluate/Verify Data",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US7",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": "The wireless connection works on a whim (about 40% of the time I've owned it). It seems to constantly run out of ink. Cartridge prices are less than some printers I've had, but now I understand why. This printer seems to have more reasons NOT to work (none that are findable or correctable) Ex: error boxes saying that it's out of paper when it automatically switches to photo printing for some reason. Scanner is as slow as my first scanner I ever owned in the mid-90's. For the $176 I paid, there isn't even a fax component on it. I guess the \"PLUS\" part of it's name is in reference to the migraines it causes when you can't figure out the new reason why it's not working for the 10th time in the past 2 months."
|
|
},
|
|
{
|
|
"task_id": 169,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Tell me the full names of the repositories where I made contributions and they got the most stars?",
|
|
"strategies": [
|
|
"Navigate to User's Contributions",
|
|
"Identify High-Star Repositories"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": "a11yproject.com, Primer/design"
|
|
},
|
|
{
|
|
"task_id": 173,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Open my latest updated issue that has keyword \"better\" in its title to check if it is closed",
|
|
"strategies": [
|
|
"Navigate to Assigned Issues",
|
|
"Filter Issues by Keyword 'better'",
|
|
"Sort Issues by Latest Update",
|
|
"Open and Verify Target Issue"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Search/Filter/Sort Data",
|
|
"View/Inspect Item Details"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US2",
|
|
"US5"
|
|
],
|
|
"reference_answer_raw_annotation": "Not closed"
|
|
},
|
|
{
|
|
"task_id": 182,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Open my latest created issue that has homepage content in its title to check if it is closed",
|
|
"strategies": [
|
|
"Access the Issues List",
|
|
"Filter and Sort Issues",
|
|
"Open and Verify Target Issue"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"View/Inspect Item Details"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US5"
|
|
],
|
|
"reference_answer_raw_annotation": "closed"
|
|
},
|
|
{
|
|
"task_id": 184,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Give me the name of the products that have 0 units left",
|
|
"strategies": [
|
|
"Navigate to the Products Section",
|
|
"Apply Quantity Filter"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2"
|
|
],
|
|
"reference_answer_raw_annotation": "Sinbad Fitness Tank"
|
|
},
|
|
{
|
|
"task_id": 190,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Tell me the total cost of my latest complete order?",
|
|
"strategies": [
|
|
"Access Order History",
|
|
"Retrieve Latest Order Details"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"View/Inspect Item Details"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US5"
|
|
],
|
|
"reference_answer_raw_annotation": "65.32"
|
|
},
|
|
{
|
|
"task_id": 196,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Compare the payment difference of the last 4 cancelled orders and completed orders",
|
|
"strategies": [
|
|
"Navigate to Orders Report",
|
|
"Configure Date Range",
|
|
"Analyze Completed Orders",
|
|
"Analyze Canceled Orders",
|
|
"Compare Payments"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Configure Parameters/Settings",
|
|
"Analyze/Evaluate/Verify Data",
|
|
"Analyze/Evaluate/Verify Data",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US3",
|
|
"US7",
|
|
"US7",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": "194.25"
|
|
},
|
|
{
|
|
"task_id": 202,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Get the date of the most recent canlled order",
|
|
"strategies": [
|
|
"Navigate to Orders Report",
|
|
"Filter for Canceled Orders",
|
|
"Set Date Range to Capture Recent Orders",
|
|
"Generate and Analyze Report"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Configure Parameters/Settings",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US3",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": "May 23, 2023"
|
|
},
|
|
{
|
|
"task_id": 205,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "How many commits did kilian make on 3/5/2023?",
|
|
"strategies": [
|
|
"Navigate to the Commits Section",
|
|
"Filter Commits by Author",
|
|
"Locate Commits on March 5, 2023"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Search/Filter/Sort Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US2"
|
|
],
|
|
"reference_answer_raw_annotation": "1"
|
|
},
|
|
{
|
|
"task_id": 215,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "What are the key aspects that the customers don't like about Circe ice fleece",
|
|
"strategies": [
|
|
"Identify Search-Related Issues",
|
|
"Analyze Product Configuration",
|
|
"Evaluate Customer Feedback"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"Analyze/Evaluate/Verify Data",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US7",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": "Material quality, fit, insufficient warmth, color"
|
|
},
|
|
{
|
|
"task_id": 220,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "Show me the walking distance from nearby hotels to Gardner Steel Conference Center, that take at most 5 minutes?",
|
|
"strategies": [
|
|
"Locate Gardner Steel Conference Center",
|
|
"Identify Nearby Hotels",
|
|
"Calculate Walking Routes"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": "Wyndham Pittsburgh University Cente: 375 m\nThe Oaklander Hotel: 338 m"
|
|
},
|
|
{
|
|
"task_id": 221,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "I am at CMU Pittsburgh, how long it takes to the nearest USPS postal office with different transportation methods?",
|
|
"strategies": [
|
|
"Locate CMU on OpenStreetMap",
|
|
"Find nearest USPS post office",
|
|
"Calculate travel times"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": "Walk: 1 minute to walk and\nDrive: less than 1 minute\nBike: less than 1 minute"
|
|
},
|
|
{
|
|
"task_id": 225,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "What do customers say about brush from sephora",
|
|
"strategies": [
|
|
"Search for 'sephora brush' products",
|
|
"Access product details"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"View/Inspect Item Details"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US5"
|
|
],
|
|
"reference_answer_raw_annotation": "N/A"
|
|
},
|
|
{
|
|
"task_id": 227,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "What is the price range for products from EYZUTAK?",
|
|
"strategies": [
|
|
"Search for EYZUTAK Products",
|
|
"Access Product Page",
|
|
"Explore Product Variants for Pricing"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"Navigate To Page/Section",
|
|
"View/Inspect Item Details"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US1",
|
|
"US5"
|
|
],
|
|
"reference_answer_raw_annotation": "$9.99"
|
|
},
|
|
{
|
|
"task_id": 23,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "List out reviewers, if exist, who mention about good fingerprint resistant",
|
|
"strategies": [
|
|
"Access Customer Reviews Section",
|
|
"Inspect Reviews for Fingerprint Resistance Mentions"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": "Rachel, T. Gannon, "
|
|
},
|
|
{
|
|
"task_id": 235,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Get the order number of my most recent under delivery order ",
|
|
"strategies": [
|
|
"Navigate to the My Orders Page",
|
|
"Identify the Most Recent Under-Delivery Order"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2"
|
|
],
|
|
"reference_answer_raw_annotation": "N/A"
|
|
},
|
|
{
|
|
"task_id": 236,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "Where is the nearest pharmacy from Carnegie Mellon I can walk within 20mins",
|
|
"strategies": [
|
|
"Locate Carnegie Mellon University on OpenStreetMap",
|
|
"Search for Pharmacies Near Carnegie Mellon University",
|
|
"Identify and Select the Nearest Pharmacy"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": "Schiller's Pharmacy, 811, South Aiken Avenue, Shadyside, Pittsburgh, Allegheny County, 15232, United States"
|
|
},
|
|
{
|
|
"task_id": 240,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "I am doing a market survey for one stop market, show me the most expensive product from competitive swimwear category",
|
|
"strategies": [
|
|
"Search for Competitive Swimwear",
|
|
"Sort Results by Price",
|
|
"Optimize Results Display",
|
|
"Verify Across Pages"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"Search/Filter/Sort Data",
|
|
"Update/Modify Item",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US2",
|
|
"US10",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 247,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Show me the email address of the customer who is the most unhappy with the style of Zoe products",
|
|
"strategies": [
|
|
"Locate Negative Reviews for Zoe Products",
|
|
"Retrieve Customer Email"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"Extract/Retrieve Information"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US6"
|
|
],
|
|
"reference_answer_raw_annotation": "N/A"
|
|
},
|
|
{
|
|
"task_id": 250,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "Tell me the coordinates of Apple Store near Pitt in DD format",
|
|
"strategies": [
|
|
"Search for Apple Store in Pittsburgh via OpenStreetMap",
|
|
"Retrieve coordinates in DD format"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"Extract/Retrieve Information"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US6"
|
|
],
|
|
"reference_answer_raw_annotation": "40.4511693, -79.9334241"
|
|
},
|
|
{
|
|
"task_id": 254,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "What is the phone number of Western Pennsylvania Hospital",
|
|
"strategies": [
|
|
"Search for the Hospital",
|
|
"Access Hospital Details",
|
|
"Retrieve Phone Number"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"View/Inspect Item Details",
|
|
"Extract/Retrieve Information"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US5",
|
|
"US6"
|
|
],
|
|
"reference_answer_raw_annotation": "4125785000"
|
|
},
|
|
{
|
|
"task_id": 258,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "See all public projects",
|
|
"strategies": [
|
|
"Navigate to Public Projects via Explore Section"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 259,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Get me my RSS feed token",
|
|
"strategies": [
|
|
"Navigate to Access Tokens Section",
|
|
"Retrieve RSS Feed Token"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Extract/Retrieve Information"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US6"
|
|
],
|
|
"reference_answer_raw_annotation": "TMN_bBn9Z48qVbUFZV45"
|
|
},
|
|
{
|
|
"task_id": 268,
|
|
"sites": [
|
|
"wikipedia",
|
|
"map"
|
|
],
|
|
"intent": "What's the closest national park to Vinalhaven, ME? How long does it take to bike there?",
|
|
"strategies": [
|
|
"Locate Vinalhaven, ME on OpenStreetMap",
|
|
"Confirm location and access detailed information",
|
|
"Find Acadia National Park and calculate biking route"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"View/Inspect Item Details",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US5",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": "Acadia National Park\n10h 33min"
|
|
},
|
|
{
|
|
"task_id": 27,
|
|
"sites": [
|
|
"reddit"
|
|
],
|
|
"intent": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the Showerthoughts forum.",
|
|
"strategies": [
|
|
"Navigate to Showerthoughts Forum and Find Latest Post",
|
|
"Access the Author's Profile",
|
|
"Analyze Comments for Downvote Count"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Navigate To Page/Section",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US1",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": "0"
|
|
},
|
|
{
|
|
"task_id": 270,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Show me products under $30 in \"men shoes\" category",
|
|
"strategies": [
|
|
"Navigate to Men's Shoes Category",
|
|
"Filter Products by Price",
|
|
"Optimize Product Display"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 276,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Search for \"switch accessories\"",
|
|
"strategies": [
|
|
"Search for 'switch accessories'"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 279,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Provide me with the complete names of Bluetooth headphones from Sony, and also share the price range for the available models",
|
|
"strategies": [
|
|
"Initiate Search for Sony Bluetooth Headphones",
|
|
"Organize Product Listings",
|
|
"Collect Product Data"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"Search/Filter/Sort Data",
|
|
"Extract/Retrieve Information"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US2",
|
|
"US6"
|
|
],
|
|
"reference_answer_raw_annotation": "These models are avaiable: SONY WH1000XM3 Bluetooth Wireless Noise Canceling Headphones Silver WH-1000XM3/S (Renewed) Sony WH-CH710N/H Wireless Bluetooth Noise Cancelling Headphones Sony WH-1000XM3B Wireless Bluetooth Noise-Canceling Over-Ear Headphones (Black) Basic Headphone Bundle Kit with Stylus Sony Wireless Headphones WH-CH510: Wireless Bluetooth On-Ear Headset with Mic for Phone-Call, Black Sony WHCH710N Wireless Bluetooth Noise Canceling Over-The-Ear Headphones (Black) with Kratos 18W PD Two-Port Power Adapter and Kratos 6-Feet Nylon Braided USB-C Cable Bundle (3 Items) Sony WI-SP500 Wireless in-Ear Sports Headphones, White (WISP500/W) Sony WI-SP510 Extra BASS Wireless in-Ear Headset/Headphones with mic for Phone Call Sports IPX5 Bluetooth, Black (WISP510/B) Sony MDRAS600BT Active Sports Bluetooth Headset (Black) Sony WH-1000XM4 Wireless Noise Canceling Over-Ear Headphones (Black) with Sony WLA-NS7 Wireless TV Adapter Bundle (2 Items) Sony WI-C300 Wireless In-Ear Headphones, Red (WIC300/R) Sony XB950N1 Extra Bass Wireless Noise Canceling Headphones, Black SONY - H900N Hi-Res Noise Cancelling Wireless Headphone Grayish Black Renewed The price ranges from $18.99 to $406 "
|
|
},
|
|
{
|
|
"task_id": 283,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Look up the most recent models of XBox controllers released between 2020-2021?",
|
|
"strategies": [
|
|
"Perform Initial Search for Xbox Controllers",
|
|
"Refine Search to Target Specific Models",
|
|
"Navigate Through Search Result Pages",
|
|
"Inspect Product Details for Release Dates"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"Search/Filter/Sort Data",
|
|
"Navigate Within Data/Results",
|
|
"View/Inspect Item Details"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US2",
|
|
"US8",
|
|
"US5"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 285,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Show the least expensive switch card holder with a minimum storage capacity of 15 cards.",
|
|
"strategies": [
|
|
"Perform Initial Search for 'switch card holder'",
|
|
"Optimize Search Results for Price and Capacity Evaluation",
|
|
"Identify and Validate the Least Expensive Qualifying Product"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"Analyze/Evaluate/Verify Data",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US7",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 287,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "How much time does it take from Pittsburgh to Philadelphia by car?",
|
|
"strategies": [
|
|
"Locate Pittsburgh on the map",
|
|
"Configure route parameters",
|
|
"Calculate and retrieve driving time"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Configure Parameters/Settings",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US3",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": "5h 47min"
|
|
},
|
|
{
|
|
"task_id": 288,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Tell me the name of the customer who has the most cancellations in the history",
|
|
"strategies": [
|
|
"Navigate to Customer Order Report",
|
|
"Configure Report Parameters",
|
|
"Generate and Analyze Report"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Configure Parameters/Settings",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US3",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": "Samantha Jones"
|
|
},
|
|
{
|
|
"task_id": 296,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Show me the command to clone the best GAN python implementation with SSH.",
|
|
"strategies": [
|
|
"Search and Access the PyTorch-GAN Project",
|
|
"Clone the Repository Using SSH"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": "ssh://git@metis.lti.cs.cmu.edu:2222/eriklindernoren/PyTorch-GAN.git"
|
|
},
|
|
{
|
|
"task_id": 305,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "How many commits did Philip make in 2023/1?",
|
|
"strategies": [
|
|
"Navigate to the Commits Section",
|
|
"Filter Commits by Author and Date",
|
|
"Count Filtered Commits"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": "0"
|
|
},
|
|
{
|
|
"task_id": 313,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Which number to call for the customer service?",
|
|
"strategies": [
|
|
"Navigate to the Contact Us Page",
|
|
"Locate the Customer Service Phone Number"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Extract/Retrieve Information"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US6"
|
|
],
|
|
"reference_answer_raw_annotation": "N/A"
|
|
},
|
|
{
|
|
"task_id": 321,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "How much refund I should expect from my order canlled in 2022, including shipping fee",
|
|
"strategies": [
|
|
"Access Order History",
|
|
"Locate Canceled 2022 Order",
|
|
"Review Order Details for Refund Information"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": "3053.97"
|
|
},
|
|
{
|
|
"task_id": 324,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Show me the \"chairs\" listings by ascending price.",
|
|
"strategies": [
|
|
"Search for 'chairs'",
|
|
"Sort results by ascending price"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"Search/Filter/Sort Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US2"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 33,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "I will arrive Pittsburgh Airport soon. Provide the name of a Hilton hotel in the vicinity, if available. Then, tell me the the shortest walking distance to a supermarket from the hotel.",
|
|
"strategies": [
|
|
"Locate Hilton Hotel near Pittsburgh Airport",
|
|
"Identify Nearest Supermarket to the Hotel",
|
|
"Calculate Shortest Walking Route"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"Search/Filter/Sort Data",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US2",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": "DoubleTree by Hilton Hotel Pittsburgh Airport Distance: 1.4km"
|
|
},
|
|
{
|
|
"task_id": 333,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "How much did I spend on shopping at One Stop Market on November 2022? They gave me a 20% discount on the total amount for orders exceeding $200 in cash",
|
|
"strategies": [
|
|
"Access Order History",
|
|
"Identify and Calculate Discounted Orders"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": "359.546"
|
|
},
|
|
{
|
|
"task_id": 335,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Tell me when I last ordered my body butter?",
|
|
"strategies": [
|
|
"Access Order History via My Account",
|
|
"Review Order Details",
|
|
"Navigate Multi-page Results"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"View/Inspect Item Details",
|
|
"Navigate Within Data/Results"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US5",
|
|
"US8"
|
|
],
|
|
"reference_answer_raw_annotation": "January 16th 2023"
|
|
},
|
|
{
|
|
"task_id": 341,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "List all opened issues requesting new features",
|
|
"strategies": [
|
|
"Navigate to Issues Section",
|
|
"Filter by Feature Request Label"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 348,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "How many reviews our shop received in May 2023?",
|
|
"strategies": [
|
|
"Navigate to Advanced Reporting & Product Reviews Report",
|
|
"Filter Reviews by May 2023 Date Range",
|
|
"Execute Search & Retrieve Results"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": "0"
|
|
},
|
|
{
|
|
"task_id": 349,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Who else have access to my repo gimmiethat.space, show me their usernames",
|
|
"strategies": [
|
|
"Navigate to Target Project",
|
|
"Access Member Permissions",
|
|
"Identify Authorized Users"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"View/Inspect Item Details",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US5",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": "yjlou"
|
|
},
|
|
{
|
|
"task_id": 354,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "List products from living room furtniture category by descending price",
|
|
"strategies": [
|
|
"Navigate to Living Room Furniture Category",
|
|
"Sort Products by Descending Price"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 357,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Checkout merge requests requiring my review",
|
|
"strategies": [
|
|
"Access Merge Requests Section",
|
|
"Filter Merge Requests Requiring Review"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 361,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Show me the order statuses for order number 170 and 189.",
|
|
"strategies": [
|
|
"Access Order History"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1"
|
|
],
|
|
"reference_answer_raw_annotation": "170: cancelled, 189: pending"
|
|
},
|
|
{
|
|
"task_id": 367,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "Measure distance between Carnegie Mellon University and CVS (closet one) by walking",
|
|
"strategies": [
|
|
"Initiate Directions Between Two Points",
|
|
"Locate Target CVS Pharmacy",
|
|
"Calculate Walking Route"
|
|
],
|
|
"universal_strategies": [
|
|
"Configure Parameters/Settings",
|
|
"Search/Filter/Sort Data",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US3",
|
|
"US2",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": "1.4km"
|
|
},
|
|
{
|
|
"task_id": 368,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "find discounted items.",
|
|
"strategies": [
|
|
"Access Advanced Search Functionality",
|
|
"Execute Discount-Focused Search",
|
|
"Validate Discounted Products"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US4",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": "There is no function to show only discount items."
|
|
},
|
|
{
|
|
"task_id": 369,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "Pull up the description page of Carnegie Music Hall on Map",
|
|
"strategies": [
|
|
"Close Welcome Modal and Search for Carnegie Music Hall",
|
|
"Access Description Page on Map",
|
|
"Verify External Details (Optional)"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"View/Inspect Item Details",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US5",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 37,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "Check if the police station in pittsburgh can be reached in one hour by car from gates building at CMU",
|
|
"strategies": [
|
|
"Access Directions Feature",
|
|
"Input Initial Route Points",
|
|
"Correct Route Direction",
|
|
"Generate and Review Route"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Configure Parameters/Settings",
|
|
"Configure Parameters/Settings",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US3",
|
|
"US3",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": "Yes"
|
|
},
|
|
{
|
|
"task_id": 374,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Preview the Magento Blank theme for my shop",
|
|
"strategies": [
|
|
"Navigate to Themes Section",
|
|
"Select Magento Blank Theme",
|
|
"Review Theme Configuration"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Update/Modify Item",
|
|
"View/Inspect Item Details"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US10",
|
|
"US5"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 376,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Summarize customer reviews for Amazon Echo Dot 3rd generation.",
|
|
"strategies": [
|
|
"Navigate to Amazon Echo Dot 3rd Gen Product Page",
|
|
"Access Customer Reviews"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Navigate To Page/Section"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US1"
|
|
],
|
|
"reference_answer_raw_annotation": "N/A"
|
|
},
|
|
{
|
|
"task_id": 381,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "Find the hotel around Carnegie Music Hall",
|
|
"strategies": [
|
|
"Search for Hotels Near Carnegie Music Hall"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 382,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "I am arriving at Carnegie Mellon University. Find the nearby US Citizenship and Immigration Services and the walking distance to the nearest Social Security Administration from US Citizenship and Immigration Services",
|
|
"strategies": [
|
|
"Locate the USCIS Office Near Carnegie Mellon University",
|
|
"Find the Nearest Social Security Administration (SSA) Office from USCIS",
|
|
"Calculate Walking Distance from USCIS to SSA"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"Search/Filter/Sort Data",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US2",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": "N/A"
|
|
},
|
|
{
|
|
"task_id": 383,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "I am arriving at Pittsburgh Airport. Show me the name of a Hyatt hotel if there is any nearby. Tell me the names of supermarkets that are within 15mins driving from the hotel",
|
|
"strategies": [
|
|
"Locate Hyatt Hotel near Pittsburgh Airport",
|
|
"Find supermarkets within 15 minutes drive from the Hyatt"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"Search/Filter/Sort Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US2"
|
|
],
|
|
"reference_answer_raw_annotation": "Hyatt Regency Pittsburgh International Airport Giant Eagle, ALDI"
|
|
},
|
|
{
|
|
"task_id": 384,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "List the customer names who complain about the quality of EYZUTAK phone cases",
|
|
"strategies": [
|
|
"Navigate to the EYZUTAK Phone Case Product Page",
|
|
"Access Customer Reviews Section",
|
|
"Navigate Through Review Pages",
|
|
"Identify and Extract Complainant Names"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Navigate To Page/Section",
|
|
"Navigate Within Data/Results",
|
|
"Extract/Retrieve Information"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US1",
|
|
"US8",
|
|
"US6"
|
|
],
|
|
"reference_answer_raw_annotation": "Lisa Lee, Evelyn Kurver, Amanda, N Randall"
|
|
},
|
|
{
|
|
"task_id": 386,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "What is the rating of Ugreen lightning to 3.5mm cable. Please round to the nearest whole number",
|
|
"strategies": [
|
|
"Search for the Product",
|
|
"Access Product Details",
|
|
"Retrieve Product Rating"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"View/Inspect Item Details",
|
|
"Extract/Retrieve Information"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US5",
|
|
"US6"
|
|
],
|
|
"reference_answer_raw_annotation": "Around 3.1 stars"
|
|
},
|
|
{
|
|
"task_id": 387,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Who gave 4 or 5 stars for phone cases from EYZUTAK",
|
|
"strategies": [
|
|
"Search for EYZUTAK Phone Cases",
|
|
"Access Product Details and Reviews",
|
|
"Identify 4/5-Star Reviewers"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"View/Inspect Item Details",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US5",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": "MH, Misba009, Amanda, Amazon Customer, Cally, Bethany Robertson, Bethany Robertson"
|
|
},
|
|
{
|
|
"task_id": 392,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Post \"Good idea\" for the merge request related to color ulitity in a11yproject.com project",
|
|
"strategies": [
|
|
"Navigate to the Project's Merge Requests Section",
|
|
"Locate the Target Merge Request",
|
|
"Post the Comment"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 4,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "What are the top-3 best-selling product in Jan 2023",
|
|
"strategies": [
|
|
"Navigate to Bestsellers Report",
|
|
"Configure Report Parameters for January 2023",
|
|
"Generate and Review Bestsellers Report"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Configure Parameters/Settings",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US3",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": "Impulse Duffle, Overnight Duffle, Hawkeye Yoga Short-32-Blue"
|
|
},
|
|
{
|
|
"task_id": 404,
|
|
"sites": [
|
|
"reddit"
|
|
],
|
|
"intent": "Upvote the newest post in books subreddit",
|
|
"strategies": [
|
|
"Navigate to the Books Forum",
|
|
"Sort Posts by Newest",
|
|
"Upvote the Newest Post"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 419,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Set my gitlab status as Enjoying life.",
|
|
"strategies": [
|
|
"Access User Menu and Open Status Modal",
|
|
"Input and Save Custom Status",
|
|
"Validate Status Update"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Update/Modify Item",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US10",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 423,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Mark all Hollister shirts on sale",
|
|
"strategies": [
|
|
"Navigate to Product Management",
|
|
"Search for Hollister Shirts",
|
|
"Bulk Selection and Status Update"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 426,
|
|
"sites": [
|
|
"wikipedia",
|
|
"map"
|
|
],
|
|
"intent": "Find the page of the place in Pennsylvania where a plane crashed during the September 11th attacks on the map.",
|
|
"strategies": [
|
|
"Access Wikipedia to Identify the 9/11 Pennsylvania Plane Crash Site",
|
|
"Locate Crash Site on OpenStreetMap"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Navigate To Page/Section"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US1"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 43,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "List the top 3 search terms in my store",
|
|
"strategies": [
|
|
"Navigate to the Search Terms Report",
|
|
"Identify Top 3 Search Terms"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": "hollister, Joust Bag, Antonia Race Tank"
|
|
},
|
|
{
|
|
"task_id": 44,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Check out my todos",
|
|
"strategies": [
|
|
"Access User Profile and Navigation",
|
|
"Search for Target Project",
|
|
"Access TODO Project",
|
|
"Review Assigned Todos"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Navigate To Page/Section",
|
|
"View/Inspect Item Details"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US1",
|
|
"US5"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 440,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "I previously ordered some a make up removal kit during summer 2022 and later cancelled. Can you reorder it for me?",
|
|
"strategies": [
|
|
"Access Order History to Locate the Canceled Order",
|
|
"Search and Add the Product Manually",
|
|
"Adjust Cart and Complete Purchase"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US4",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 443,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Update the project site's title to \"Not an interesting site\"",
|
|
"strategies": [
|
|
"Access and Edit the index.html File",
|
|
"Commit the Changes"
|
|
],
|
|
"universal_strategies": [
|
|
"Update/Modify Item",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US10",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 448,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "set the homepage URL on my GitLab profile to https://egg.tart.com",
|
|
"strategies": [
|
|
"Access Profile Settings",
|
|
"Update Homepage URL"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 454,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Disable Ryker Tee Crew Neck from the site, they are facing some quality issues.",
|
|
"strategies": [
|
|
"Navigate to Products Management Section",
|
|
"Search for Target Product",
|
|
"Access Product Edit Page",
|
|
"Disable Product and Save Changes"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Navigate To Page/Section",
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US1",
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 458,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Reduce the price of this product by $5",
|
|
"strategies": [
|
|
"Modify Product Price"
|
|
],
|
|
"universal_strategies": [
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 46,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Check out the most recent open issues",
|
|
"strategies": [
|
|
"Navigate to the Issues Section",
|
|
"Filter and Sort Open Issues"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 464,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Update the product description of Antonia Racer Tank to highlight the real user positive reviews by quoting the comments",
|
|
"strategies": [
|
|
"Navigate to the Antonia Racer Tank Product Edit Page",
|
|
"Extract Positive User Reviews",
|
|
"Integrate Reviews into Product Description"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Extract/Retrieve Information",
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US6",
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 466,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Add 2 Hawaiian Bamboo Orchid Roots #zc50 - by Discount Hawaiian Gifts to my wish list",
|
|
"strategies": [
|
|
"Search for the Product",
|
|
"Add Product to Wish List"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 471,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Cancel order 307",
|
|
"strategies": [
|
|
"Navigate to Order 307 Details",
|
|
"Cancel the Order"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 476,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Set up a new, empty repository with the name awesome_llm_reading?",
|
|
"strategies": [
|
|
"Initiate Project Creation",
|
|
"Configure Project Details",
|
|
"Finalize Project Creation"
|
|
],
|
|
"universal_strategies": [
|
|
"Execute Action/Process",
|
|
"Configure Parameters/Settings",
|
|
"Create Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US4",
|
|
"US3",
|
|
"US9"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 48,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Today is 6/12/2023. Tell me how many fulfilled orders I have over the past three days, and the total amount of money I spent.",
|
|
"strategies": [
|
|
"Access Order History",
|
|
"Paginate Through Orders",
|
|
"Review and Calculate Totals"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Navigate Within Data/Results",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US8",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": "0 order, $0 total spend"
|
|
},
|
|
{
|
|
"task_id": 485,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Vinta wants to check my dotfile configurations. Please invite him to the repo as a guest.",
|
|
"strategies": [
|
|
"Navigate to the Dotfiles Project",
|
|
"Access Project Members Section",
|
|
"Invite Vinta as a Guest"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US1",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 488,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Change the page title of \"Home Page\" page on my site to \"This is the home page!! Leave here!!\".",
|
|
"strategies": [
|
|
"Navigate to the Home Page Edit Interface",
|
|
"Modify and Save the Page Title"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 497,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Update order #307 with the DHL tracking number 24353446464",
|
|
"strategies": [
|
|
"Navigate to Order #307",
|
|
"Update Shipment with Tracking Number"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 505,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Make all Aeno capri as out of stock",
|
|
"strategies": [
|
|
"Navigate to Products Section",
|
|
"Search for Aeno Capri Products",
|
|
"Update Stock Status to Out of Stock"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 506,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Buy the highest rated product from the meat substitute category within a budget between 100 and 200.",
|
|
"strategies": [
|
|
"Navigate to Meat Substitutes Category",
|
|
"Filter Products by Budget",
|
|
"Select Highest-Rated Product",
|
|
"Proceed to Checkout",
|
|
"Complete Order Placement"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Execute Action/Process",
|
|
"Execute Action/Process",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US4",
|
|
"US4",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 509,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Buy the best rating product from \"Men's shoe\" category with at least 5 reviews and the product is least expensive",
|
|
"strategies": [
|
|
"Navigate to Men's Shoes Category",
|
|
"Sort Products by Price",
|
|
"Browse and Filter Products",
|
|
"Select Product and Options",
|
|
"Add to Cart and Checkout"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Search/Filter/Sort Data",
|
|
"Execute Action/Process",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US2",
|
|
"US4",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 514,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Add a white desk to my wish list.",
|
|
"strategies": [
|
|
"Search for 'white desk'",
|
|
"Add product to wish list"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 516,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Add this product to my wishlist",
|
|
"strategies": [
|
|
"Access Account and Wishlist Management",
|
|
"Return to Product Page",
|
|
"Add Product to Wishlist"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US1",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 521,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Subscribe to the newsletter of OneStopMarket",
|
|
"strategies": [
|
|
"Access Account Dashboard",
|
|
"Manage Newsletter Subscriptions"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 524,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Star the top eight most stared repos in Gitlab",
|
|
"strategies": [
|
|
"Navigate to Most Starred Projects",
|
|
"Star Top Eight Projects"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 528,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Draft a refund message via their \"contact us\" form for the phone screen protector I bought March 2023. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet",
|
|
"strategies": [
|
|
"Access Order Details to Retrieve Order ID",
|
|
"Navigate to Contact Form",
|
|
"Draft Refund Message"
|
|
],
|
|
"universal_strategies": [
|
|
"View/Inspect Item Details",
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US5",
|
|
"US1",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 534,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Follow ['Jakub Klinkovský', 'Koushik', 'Vinta Chen'] on Gitlab",
|
|
"strategies": [
|
|
"Follow Jakub Klinkovský (lahwaacz)",
|
|
"Follow Vinta Chen"
|
|
],
|
|
"universal_strategies": [
|
|
"Execute Action/Process",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US4",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 538,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Modify the address of order #299 to 456 Oak Avenue, Apartment 5B, New York, NY, 10001",
|
|
"strategies": [
|
|
"Navigate to Order #299 Details",
|
|
"Edit the Order Address",
|
|
"Save the Updated Address"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Update/Modify Item",
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US10",
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 548,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Add a new color blue to size S and M of Frankie Sweatshirt",
|
|
"strategies": [
|
|
"Navigate to Frankie Sweatshirt Product Page",
|
|
"Clean Existing Blue Variants",
|
|
"Add Blue Color to Target Sizes"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Update/Modify Item",
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US10",
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 56,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "How long does it take to walk from Carnegie Museum of Art to a library at CMU?",
|
|
"strategies": [
|
|
"Initialize Mapping Context",
|
|
"Locate Carnegie Museum of Art",
|
|
"Identify Target Library",
|
|
"Calculate Walking Route"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US1",
|
|
"US2",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": "11 min"
|
|
},
|
|
{
|
|
"task_id": 566,
|
|
"sites": [
|
|
"gitlab",
|
|
"reddit"
|
|
],
|
|
"intent": "create a repository named Do it myself that includes a README file with the links to the most active 8 DIY ideas on DIY subreddit?",
|
|
"strategies": [
|
|
"Create GitLab Repository 'Do it myself' with README",
|
|
"Collect Top 8 Active DIY Post URLs from Reddit",
|
|
"Edit README with DIY Post Links and Commit"
|
|
],
|
|
"universal_strategies": [
|
|
"Create Item",
|
|
"Extract/Retrieve Information",
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US9",
|
|
"US6",
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 574,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "I recently moved, my address is 111 Magnolia Path, Atlanta, GA, 30303, update my information on OneStopShopping accordingly",
|
|
"strategies": [
|
|
"Navigate to Address Management",
|
|
"Edit Existing Address",
|
|
"Update Address Fields",
|
|
"Finalize Changes"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Update/Modify Item",
|
|
"Update/Modify Item",
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US10",
|
|
"US10",
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 577,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Add the following users to my GitHub timeline item management extension as maintainer: ['abisubramanya27', 'lahwaacz']",
|
|
"strategies": [
|
|
"Navigate to Project Members Section",
|
|
"Invite Users as Maintainers"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 58,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "Tell me the closest cafe(s) to CMU Hunt library",
|
|
"strategies": [
|
|
"Access the Map Interface",
|
|
"Search for Cafes Near Hunt Library",
|
|
"Review and Select Cafes from Results",
|
|
"Inspect Specific Cafe Details"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Execute Action/Process",
|
|
"View/Inspect Item Details"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US4",
|
|
"US5"
|
|
],
|
|
"reference_answer_raw_annotation": "De Fer Coffee & Tea"
|
|
},
|
|
{
|
|
"task_id": 582,
|
|
"sites": [
|
|
"reddit"
|
|
],
|
|
"intent": "Create a new forum named Cyberpunk, with a description of Welcome to the future, and include ['Games', 'Books', 'Movies', 'Future'] in the sidebar?",
|
|
"strategies": [
|
|
"Navigate to Forum Creation Page",
|
|
"Fill Basic Forum Details",
|
|
"Configure Sidebar Content",
|
|
"Submit Forum Creation"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Configure Parameters/Settings",
|
|
"Configure Parameters/Settings",
|
|
"Create Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US3",
|
|
"US3",
|
|
"US9"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 593,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Create a milestone for the upcoming task of merging all branches to main starting on March 15, 2044 and ending on March 30, 2044",
|
|
"strategies": [
|
|
"Navigate to Milestones Section",
|
|
"Initiate New Milestone Creation",
|
|
"Define Milestone Details",
|
|
"Finalize Milestone Creation"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process",
|
|
"Configure Parameters/Settings",
|
|
"Create Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US4",
|
|
"US3",
|
|
"US9"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 599,
|
|
"sites": [
|
|
"reddit"
|
|
],
|
|
"intent": "Open the thread of a trending post on the forum \"machine learning\" and subscribe.",
|
|
"strategies": [
|
|
"Navigate to the Machine Learning Forum",
|
|
"Locate the Trending Post",
|
|
"Access the Thread and Subscribe"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 605,
|
|
"sites": [
|
|
"reddit"
|
|
],
|
|
"intent": "Find a subreddit focused on topics related to gaming consoles, and post my question, \"what is the recommended console to buy these days\" there",
|
|
"strategies": [
|
|
"Navigate to the Consoles Forum",
|
|
"Create and Submit a Post in the Consoles Forum"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Create Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US9"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 612,
|
|
"sites": [
|
|
"reddit"
|
|
],
|
|
"intent": "Post a review of my recent reading \"big little lies\" in the r/books with my comment \"can't stop it\".",
|
|
"strategies": [
|
|
"Navigate to the r/books Forum",
|
|
"Initiate a New Submission",
|
|
"Compose the Post",
|
|
"Submit the Post"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process",
|
|
"Configure Parameters/Settings",
|
|
"Create Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US4",
|
|
"US3",
|
|
"US9"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 619,
|
|
"sites": [
|
|
"reddit"
|
|
],
|
|
"intent": "Re-post the image of costume contest in this page to funny subreddit and note \"from /f/pics\"",
|
|
"strategies": [
|
|
"Access the Existing Image Post",
|
|
"Initiate a New Submission to the Funny Subreddit",
|
|
"Fill in Submission Details",
|
|
"Submit the Post"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process",
|
|
"Configure Parameters/Settings",
|
|
"Create Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US4",
|
|
"US3",
|
|
"US9"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 626,
|
|
"sites": [
|
|
"reddit"
|
|
],
|
|
"intent": "Create a discussion post about \"Iphone 14\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"",
|
|
"strategies": [
|
|
"Navigate to the iPhone Forum's Submission Page",
|
|
"Create and Submit the Discussion Post"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Create Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US9"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 641,
|
|
"sites": [
|
|
"reddit"
|
|
],
|
|
"intent": "Post a notice on a virtual meetup for Harry Potter enthusiasts on July 8th in the books subreddit",
|
|
"strategies": [
|
|
"Search and Navigate to the Books Subreddit",
|
|
"Access the Submission Form",
|
|
"Compose the Notice",
|
|
"Submit the Post"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Navigate To Page/Section",
|
|
"Configure Parameters/Settings",
|
|
"Create Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US1",
|
|
"US3",
|
|
"US9"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 645,
|
|
"sites": [
|
|
"reddit"
|
|
],
|
|
"intent": "Post in books subreddit about what could machine learning help the corresponding field.",
|
|
"strategies": [
|
|
"Navigate to the Books Subreddit",
|
|
"Compose and Submit the Post"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Create Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US9"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 65,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Which customer has completed the fifth most number of orders in the entire history?",
|
|
"strategies": [
|
|
"Navigate to the Customers Page",
|
|
"Sort and Identify the Fifth Customer"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2"
|
|
],
|
|
"reference_answer_raw_annotation": "Jane Doe"
|
|
},
|
|
{
|
|
"task_id": 652,
|
|
"sites": [
|
|
"reddit"
|
|
],
|
|
"intent": "Reply to the post with my comment \"???\"",
|
|
"strategies": [
|
|
"Activate the Comment Input Field",
|
|
"Input and Validate Comment Content",
|
|
"Submit the Comment"
|
|
],
|
|
"universal_strategies": [
|
|
"Execute Action/Process",
|
|
"Configure Parameters/Settings",
|
|
"Create Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US4",
|
|
"US3",
|
|
"US9"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 668,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Submit a merge request for a11yproject.com/redesign branch to be merged into master branch, assign Justin Armstrong as the reviewer",
|
|
"strategies": [
|
|
"Navigate to the Target Project",
|
|
"Initiate Merge Request Creation",
|
|
"Configure Source and Target Branches",
|
|
"Assign Reviewer",
|
|
"Submit Merge Request"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process",
|
|
"Configure Parameters/Settings",
|
|
"Configure Parameters/Settings",
|
|
"Create Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US4",
|
|
"US3",
|
|
"US3",
|
|
"US9"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 678,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Lookup orders that are canceled",
|
|
"strategies": [
|
|
"Navigate to Orders Report",
|
|
"Filter by Canceled Status",
|
|
"Set Date Range",
|
|
"Generate Report"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Configure Parameters/Settings",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US3",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 682,
|
|
"sites": [
|
|
"reddit",
|
|
"gitlab"
|
|
],
|
|
"intent": "Find a GitLab repository related to chatGPT and make a Reddit post linking to it in a relevant subreddit",
|
|
"strategies": [
|
|
"Find GitLab Repository Related to ChatGPT",
|
|
"Navigate to Relevant Subreddit",
|
|
"Create Reddit Post Linking to the Repository"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data",
|
|
"Navigate To Page/Section",
|
|
"Create Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2",
|
|
"US1",
|
|
"US9"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 686,
|
|
"sites": [
|
|
"reddit",
|
|
"gitlab"
|
|
],
|
|
"intent": "Promote auth0/angular-storage to subreddit technology with the description from the repo itself.",
|
|
"strategies": [
|
|
"Locate the Angular-Storage Repository on GitLab",
|
|
"Navigate to the Forum Submission Page",
|
|
"Prepare Submission Content",
|
|
"Submit the Post"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Navigate To Page/Section",
|
|
"Configure Parameters/Settings",
|
|
"Create Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US1",
|
|
"US3",
|
|
"US9"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 69,
|
|
"sites": [
|
|
"reddit"
|
|
],
|
|
"intent": "Among the top 10 post in \"books\" forum, is there any post talks about supporting local book stores? If so, tell me the organizations involved",
|
|
"strategies": [
|
|
"Navigate to the 'Books' Forum",
|
|
"Review Top 10 in 'Books' Forum"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"View/Inspect Item Details"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US5"
|
|
],
|
|
"reference_answer_raw_annotation": "bookshop.org"
|
|
},
|
|
{
|
|
"task_id": 693,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Draft an email to the shop owner via their contact us function for a coupon as my refund is suppoed to be replaced by a coupon",
|
|
"strategies": [
|
|
"Navigate to the Contact Form",
|
|
"Fill in Personal Information",
|
|
"Compose the Message",
|
|
"Submit the Form"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Configure Parameters/Settings",
|
|
"Configure Parameters/Settings",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US3",
|
|
"US3",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 695,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Add a simple product named Energy-Bulk Man Yoga Pant with 50 in stock, available in size 38 and color yellow, priced at $69.99",
|
|
"strategies": [
|
|
"Navigate to Product Creation Interface",
|
|
"Configure Product Attributes",
|
|
"Enter Product Details",
|
|
"Assign Categories and Visibility",
|
|
"Finalize Product Creation"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Configure Parameters/Settings",
|
|
"Configure Parameters/Settings",
|
|
"Configure Parameters/Settings",
|
|
"Create Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US3",
|
|
"US3",
|
|
"US3",
|
|
"US9"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 7,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "Tell me the full address of all international airports that are within a driving distance of 50 km to Carnegie Mellon University",
|
|
"strategies": [
|
|
"Locate Pittsburgh International Airport via OpenStreetMap",
|
|
"Verify driving distance from CMU",
|
|
"Explore alternative airports via Wikipedia"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Analyze/Evaluate/Verify Data",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US7",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": "Pittsburgh International Airport People Movers, Airport Boulevard, Findlay Township, Allegheny County, Pennsylvania, 15231, United States"
|
|
},
|
|
{
|
|
"task_id": 703,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Draft a new marketing price rule for Thanks giving sale that offers $40 discount on checkout for all customers",
|
|
"strategies": [
|
|
"Navigate to Catalog Price Rules",
|
|
"Create New Catalog Price Rule",
|
|
"Configure Rule Details",
|
|
"Finalize and Save Rule"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process",
|
|
"Configure Parameters/Settings",
|
|
"Create Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US4",
|
|
"US3",
|
|
"US9"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 704,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Today is 3/15/2023, generate a sales order report for last month",
|
|
"strategies": [
|
|
"Navigate to the Orders Report Page",
|
|
"Configure Report Filters",
|
|
"Generate and Review the Report"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Configure Parameters/Settings",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US3",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 71,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "What is the zip code of Chatham University?",
|
|
"strategies": [
|
|
"Search for Chatham University on OpenStreetMap"
|
|
],
|
|
"universal_strategies": [
|
|
"Search/Filter/Sort Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US2"
|
|
],
|
|
"reference_answer_raw_annotation": "15232"
|
|
},
|
|
{
|
|
"task_id": 710,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Create a shipping report from 08/05/2022 to 03/01/2023",
|
|
"strategies": [
|
|
"Navigate to the Shipping Report Section",
|
|
"Configure and Generate the Shipping Report"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 714,
|
|
"sites": [
|
|
"reddit"
|
|
],
|
|
"intent": "Thumbs down the top 1 post ever in gadgets.",
|
|
"strategies": [
|
|
"Navigate to the Gadgets Forum",
|
|
"Sort Submissions to Identify the Top Post of All Time",
|
|
"Downvote the Top Post"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 720,
|
|
"sites": [
|
|
"reddit"
|
|
],
|
|
"intent": "Like all submissions created by CameronKelsey in subreddit earthporn",
|
|
"strategies": [
|
|
"Navigate to CameronKelsey's Submissions Page",
|
|
"Upvote All Submissions"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 729,
|
|
"sites": [
|
|
"reddit"
|
|
],
|
|
"intent": "DisLike all submissions created by AdamCannon in subreddit UpliftingNews",
|
|
"strategies": [
|
|
"Navigate to AdamCannon's Submissions",
|
|
"Dislike All Submissions"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 733,
|
|
"sites": [
|
|
"reddit"
|
|
],
|
|
"intent": "Edit my post on Star Trek Starfleet Academy series by adding a line to the body that says \"Every watch makes me feel like a kid again\"",
|
|
"strategies": [
|
|
"Navigate to User Profile to Access Submissions",
|
|
"Enter Edit Mode for the Target Post",
|
|
"Modify Post Body and Submit Changes"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Update/Modify Item",
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US10",
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 741,
|
|
"sites": [
|
|
"wikipedia",
|
|
"map"
|
|
],
|
|
"intent": "Show me the way from Carnegie Mellon University to the home stadium of Boston home NBA team ",
|
|
"strategies": [
|
|
"Identify the Boston Celtics' Home Stadium via Wikipedia",
|
|
"Plan Route via OpenStreetMap"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Configure Parameters/Settings"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US3"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 745,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Create a new public project \"awesome-llms\" and add primer, convexegg, abishek as members",
|
|
"strategies": [
|
|
"Create the \"awesome-llms\" Project",
|
|
"Add Members to the Project"
|
|
],
|
|
"universal_strategies": [
|
|
"Create Item",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US9",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 748,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Start a private project web_agent_android_xl with Android template and add primer, convexegg, abishek as members",
|
|
"strategies": [
|
|
"Create Private Project with Android Template",
|
|
"Add Project Members"
|
|
],
|
|
"universal_strategies": [
|
|
"Create Item",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US9",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 75,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "Given the following locations, ['Massachusetts Institute of Technology', 'Harvard University', 'Boston Logan International Airport'], what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.",
|
|
"strategies": [
|
|
"Set up directions from MIT to Harvard University",
|
|
"Calculate Harvard University to Boston Logan Airport route",
|
|
"Document and compare travel times"
|
|
],
|
|
"universal_strategies": [
|
|
"Configure Parameters/Settings",
|
|
"Execute Action/Process",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US3",
|
|
"US4",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": "Massachusetts Institute of Technology, Harvard University, Boston Logan International Airport"
|
|
},
|
|
{
|
|
"task_id": 760,
|
|
"sites": [
|
|
"map",
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Show me the route and driving time from Allentown, PA to the city where my E-commerce customer Amanda Kim lives",
|
|
"strategies": [
|
|
"Log into Magento Admin Panel",
|
|
"Locate Amanda Kim's Customer Details",
|
|
"Retrieve Amanda's City and State",
|
|
"Calculate Route via OpenStreetMap"
|
|
],
|
|
"universal_strategies": [
|
|
"Execute Action/Process",
|
|
"Search/Filter/Sort Data",
|
|
"Extract/Retrieve Information",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US4",
|
|
"US2",
|
|
"US6",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 762,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "Get directions from Carnegie Music Hall in NYC to Carnegie Mellon University using driving options.",
|
|
"strategies": [
|
|
"Locate Starting Point (Carnegie Music Hall NYC)",
|
|
"Initiate Directions Feature",
|
|
"Set Starting Point and Destination",
|
|
"Execute Directions with Driving Options"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process",
|
|
"Configure Parameters/Settings",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US4",
|
|
"US3",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 768,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "5 blue Cronus yoga pants with size 33 arrived, update the stock",
|
|
"strategies": [
|
|
"Navigate to the Product's Edit Page",
|
|
"Update Stock Quantity and Save Changes"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 77,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "What is the total count of Pending reviews amongst all the reviews?",
|
|
"strategies": [
|
|
"Navigate to the Product Reviews Report",
|
|
"Filter Reviews by Pending Status",
|
|
"Retrieve the Total Count"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Extract/Retrieve Information"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US6"
|
|
],
|
|
"reference_answer_raw_annotation": "5"
|
|
},
|
|
{
|
|
"task_id": 771,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Approve the positive reviews to display in our store.",
|
|
"strategies": [
|
|
"Navigate to Pending Reviews Section",
|
|
"Approve a Specific Review",
|
|
"Repeat for Additional Reviews"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Update/Modify Item",
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US10",
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 773,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Delete all pending negative reviews",
|
|
"strategies": [
|
|
"Navigate to Pending Reviews Section",
|
|
"Select Target Negative Reviews",
|
|
"Execute Mass Deletion"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process",
|
|
"Delete Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US4",
|
|
"US11"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 782,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Increase the price of all blue running tshirts in extra small and small sizes by 23%",
|
|
"strategies": [
|
|
"Navigate to Products Section",
|
|
"Search and Filter Products",
|
|
"Edit Configurable Products and Update Variants"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data",
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2",
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 787,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Tell me the number of followers of the contributor who has the most commits to branch main",
|
|
"strategies": [
|
|
"Navigate to the Master Branch Contributors Section",
|
|
"Identify the Top Contributor by Commit Count",
|
|
"Access Contributor's Profile for Followers Count"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Analyze/Evaluate/Verify Data",
|
|
"Analyze/Evaluate/Verify Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US7",
|
|
"US7"
|
|
],
|
|
"reference_answer_raw_annotation": "0"
|
|
},
|
|
{
|
|
"task_id": 791,
|
|
"sites": [
|
|
"gitlab",
|
|
"reddit"
|
|
],
|
|
"intent": "Make a folder named car on the gimmiethat.space repo and include a file called urls.txt that consists of the links to the 5 most recent posts from cars.",
|
|
"strategies": [
|
|
"Navigate to the gimmiethat.space Repository",
|
|
"Create the 'car' Folder and 'urls.txt' File",
|
|
"Collect Recent Car Post URLs from Forums",
|
|
"Input URLs and Commit the File"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Create Item",
|
|
"Extract/Retrieve Information",
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US9",
|
|
"US6",
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": "N/A"
|
|
},
|
|
{
|
|
"task_id": 798,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Change the delivery address for my most recent order to 77 Massachusetts Ave, Cambridge, MA.",
|
|
"strategies": [
|
|
"Access Address Editing Interface",
|
|
"Update Address Details"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": "N/A"
|
|
},
|
|
{
|
|
"task_id": 809,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Create an issue in a11yproject repo with title \"404 for many URLs\". Assign the issue to myself. Set due date to be 2030-1-3",
|
|
"strategies": [
|
|
"Navigate to the a11yproject.com Project",
|
|
"Initiate New Issue Creation",
|
|
"Configure Issue Details",
|
|
"Submit the Issue"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process",
|
|
"Configure Parameters/Settings",
|
|
"Create Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US4",
|
|
"US3",
|
|
"US9"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 811,
|
|
"sites": [
|
|
"gitlab"
|
|
],
|
|
"intent": "Assign the issue regarding 404 in a11yproject to myself.",
|
|
"strategies": [
|
|
"Navigate to the 404 Issue in a11yproject.com",
|
|
"Authenticate to Modify the Issue",
|
|
"Assign the Issue to Self"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Execute Action/Process",
|
|
"Update/Modify Item"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US4",
|
|
"US10"
|
|
],
|
|
"reference_answer_raw_annotation": ""
|
|
},
|
|
{
|
|
"task_id": 82,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "What is the duration required to first walk from Massachusetts Institute of Technology to Harvard University, and then drive to Boston Logan International Airport?",
|
|
"strategies": [
|
|
"Set Massachusetts Institute of Technology (MIT) as Starting Point",
|
|
"Generate Walking Route to Harvard University",
|
|
"Calculate Driving Route to Boston Logan International Airport"
|
|
],
|
|
"universal_strategies": [
|
|
"Configure Parameters/Settings",
|
|
"Execute Action/Process",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US3",
|
|
"US4",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": "63 min"
|
|
},
|
|
{
|
|
"task_id": 88,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "From my stay at Homewood Suites Southpointe, what's the estimated driving time to reach PPG Paints Arena?",
|
|
"strategies": [
|
|
"Access Directions Interface",
|
|
"Set Starting Location",
|
|
"Set Destination Location"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Configure Parameters/Settings",
|
|
"Configure Parameters/Settings"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US3",
|
|
"US3"
|
|
],
|
|
"reference_answer_raw_annotation": "34 minutes"
|
|
},
|
|
{
|
|
"task_id": 93,
|
|
"sites": [
|
|
"map"
|
|
],
|
|
"intent": "Which US states border New Hampshire?",
|
|
"strategies": [
|
|
"Close the Welcome Modal and Access Map Interface",
|
|
"Search for New Hampshire and Center the Map"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Search/Filter/Sort Data"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US2"
|
|
],
|
|
"reference_answer_raw_annotation": "Massachusetts, Vermont, Maine"
|
|
},
|
|
{
|
|
"task_id": 95,
|
|
"sites": [
|
|
"shopping_admin"
|
|
],
|
|
"intent": "Telll me the grand total of invoice 000000002.",
|
|
"strategies": [
|
|
"Navigate to the Orders Page",
|
|
"Access Order 000000002 Details",
|
|
"Retrieve Grand Total Value"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Navigate To Page/Section",
|
|
"Extract/Retrieve Information"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US1",
|
|
"US6"
|
|
],
|
|
"reference_answer_raw_annotation": "$39.64"
|
|
},
|
|
{
|
|
"task_id": 96,
|
|
"sites": [
|
|
"shopping"
|
|
],
|
|
"intent": "Tell me the status of my latest order and when will it arrive",
|
|
"strategies": [
|
|
"Access Order History and Locate Latest Order",
|
|
"Review Order Status and Details"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"View/Inspect Item Details"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US5"
|
|
],
|
|
"reference_answer_raw_annotation": "The last order was canceled. It will never arrive."
|
|
},
|
|
{
|
|
"task_id": 97,
|
|
"sites": [
|
|
"map",
|
|
"wikipedia"
|
|
],
|
|
"intent": "Tell me the distance to drive from Carnegie Mellon University to the top computer science school in massachusetts",
|
|
"strategies": [
|
|
"Access Directions Feature on OpenStreetMap",
|
|
"Specify Starting Point (Carnegie Mellon University)",
|
|
"Specify Destination (Top CS School in Massachusetts)",
|
|
"Calculate and Display Distance"
|
|
],
|
|
"universal_strategies": [
|
|
"Navigate To Page/Section",
|
|
"Configure Parameters/Settings",
|
|
"Configure Parameters/Settings",
|
|
"Execute Action/Process"
|
|
],
|
|
"universal_strategy_ids": [
|
|
"US1",
|
|
"US3",
|
|
"US3",
|
|
"US4"
|
|
],
|
|
"reference_answer_raw_annotation": "914 km"
|
|
}
|
|
] |