5838 lines
199 KiB
JSON
5838 lines
199 KiB
JSON
[
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 0,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "What are the top-{{n}} best-selling product in {{period}}",
|
||
"instantiation_dict": {
|
||
"n": 3,
|
||
"period": "Jan 2023"
|
||
},
|
||
"intent": "What are the top-3 best-selling product in Jan 2023",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"Impulse Duffle",
|
||
"Overnight Duffle",
|
||
"Hawkeye Yoga Short-32-Blue"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Impulse Duffle, Overnight Duffle, Hawkeye Yoga Short-32-Blue"
|
||
},
|
||
"intent_template_id": 279,
|
||
"old_task_id": 4
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 1,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "Tell me the full address of all {{airport_type}} that are within a driving distance of {{radius}} to {{start}}",
|
||
"instantiation_dict": {
|
||
"airport_type": "international airports",
|
||
"start": "Carnegie Mellon University",
|
||
"radius": "50 km"
|
||
},
|
||
"intent": "Tell me the full address of all international airports that are within a driving distance of 50 km to Carnegie Mellon University",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"Pittsburgh International Airport, Southern Beltway, Findlay Township, Allegheny County, 15231, United States"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Pittsburgh International Airport People Movers, Airport Boulevard, Findlay Township, Allegheny County, Pennsylvania, 15231, United States"
|
||
},
|
||
"intent_template_id": 79,
|
||
"old_task_id": 7
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 2,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Tell me the the number of reviews that our store received by far that mention term \"{{term}}\"",
|
||
"instantiation_dict": {
|
||
"term": "best"
|
||
},
|
||
"intent": "Tell me the the number of reviews that our store received by far that mention term \"best\"",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"2"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "2"
|
||
},
|
||
"intent_template_id": 288,
|
||
"old_task_id": 15
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 3,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "Compare the difference in time for walking and driving route from {{start}} to {{end}}",
|
||
"instantiation_dict": {
|
||
"start": "Randyland",
|
||
"end": "Carnegie Mellon University"
|
||
},
|
||
"intent": "Compare the difference in time for walking and driving route from Randyland to Carnegie Mellon University",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"driving: 13min",
|
||
"walking: 1h 45min"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "driving: 13min, walking: 1h 45min."
|
||
},
|
||
"intent_template_id": 73,
|
||
"old_task_id": 20
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 4,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__/3-pack-samsung-galaxy-s6-screen-protector-nearpow-tempered-glass-screen-protector-with-9h-hardness-crystal-clear-easy-bubble-free-installation-scratch-resist.html",
|
||
"geolocation": null,
|
||
"intent_template": "List out reviewers, if exist, who mention about {{description}}",
|
||
"instantiation_dict": {
|
||
"description": "good fingerprint resistant"
|
||
},
|
||
"intent": "List out reviewers, if exist, who mention about good fingerprint resistant",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"Rachel",
|
||
"T. Gannon"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Rachel, T. Gannon, "
|
||
},
|
||
"intent_template_id": 222,
|
||
"old_task_id": 23
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit"
|
||
],
|
||
"task_id": 5,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/reddit_state.json",
|
||
"start_url": "__REDDIT__",
|
||
"geolocation": null,
|
||
"intent_template": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the {{forum}} forum.",
|
||
"instantiation_dict": {
|
||
"forum": "Showerthoughts"
|
||
},
|
||
"intent": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the Showerthoughts forum.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"0"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "0"
|
||
},
|
||
"intent_template_id": 33,
|
||
"old_task_id": 27
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 6,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "I will arrive {{place}} soon. Provide the name of a {{target1}} in the vicinity, if available. Then, tell me the {{information}} to {{target2}} from the hotel.",
|
||
"instantiation_dict": {
|
||
"place": "Pittsburgh Airport",
|
||
"target1": "Hilton hotel",
|
||
"information": "the shortest walking distance",
|
||
"target2": "a supermarket"
|
||
},
|
||
"intent": "I will arrive Pittsburgh Airport soon. Provide the name of a Hilton hotel in the vicinity, if available. Then, tell me the the shortest walking distance to a supermarket from the hotel.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"DoubleTree by Hilton Hotel Pittsburgh Airport",
|
||
"1.4km"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "DoubleTree by Hilton Hotel Pittsburgh Airport Distance: 1.4km"
|
||
},
|
||
"intent_template_id": 78,
|
||
"old_task_id": 33
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 7,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "Check if the {{place}} in pittsburgh can be reached in one hour by car from {{location}}",
|
||
"instantiation_dict": {
|
||
"place": "police station",
|
||
"location": "gates building at CMU"
|
||
},
|
||
"intent": "Check if the police station in pittsburgh can be reached in one hour by car from gates building at CMU",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"Yes"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Yes"
|
||
},
|
||
"intent_template_id": 77,
|
||
"old_task_id": 37
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 8,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "List the top {{n}} search terms in my store",
|
||
"instantiation_dict": {
|
||
"n": "3"
|
||
},
|
||
"intent": "List the top 3 search terms in my store",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"hollister",
|
||
"Joust Bag",
|
||
"nike"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "hollister, Joust Bag, nike"
|
||
},
|
||
"intent_template_id": 285,
|
||
"old_task_id": 43
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 9,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Check out my todos",
|
||
"instantiation_dict": {},
|
||
"intent": "Check out my todos",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__GITLAB__/dashboard/todos",
|
||
"program_html": [],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 303,
|
||
"old_task_id": 44
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 10,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Today is 6/12/2023. Tell me how many fulfilled orders I have {{period}}, and the total amount of money I spent.",
|
||
"instantiation_dict": {
|
||
"period": "over the past three days"
|
||
},
|
||
"intent": "Today is 6/12/2023. Tell me how many fulfilled orders I have over the past three days, and the total amount of money I spent.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"0 order",
|
||
"$0 total spend"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "0 order, $0 total spend"
|
||
},
|
||
"intent_template_id": 197,
|
||
"old_task_id": 48
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 11,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "How long does it take to walk from {{start}} to {{end}}?",
|
||
"instantiation_dict": {
|
||
"start": "Carnegie Museum of Art",
|
||
"end": "a library at CMU"
|
||
},
|
||
"intent": "How long does it take to walk from Carnegie Museum of Art to a library at CMU?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"11 min"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "11 min"
|
||
},
|
||
"intent_template_id": 68,
|
||
"old_task_id": 56
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 12,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "Tell me the closest {{place1}}(s) to {{place2}}",
|
||
"instantiation_dict": {
|
||
"place1": "cafe",
|
||
"place2": "CMU Hunt library"
|
||
},
|
||
"intent": "Tell me the closest cafe(s) to CMU Hunt library",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"De Fer Coffee & Tea"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "De Fer Coffee & Tea"
|
||
},
|
||
"intent_template_id": 69,
|
||
"old_task_id": 58
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 13,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Which customer has completed the {{quantifier}} number of orders in the entire history?",
|
||
"instantiation_dict": {
|
||
"quantifier": "fifth most"
|
||
},
|
||
"intent": "Which customer has completed the fifth most number of orders in the entire history?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"Matt Baker"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Matt Baker"
|
||
},
|
||
"intent_template_id": 276,
|
||
"old_task_id": 65
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit"
|
||
],
|
||
"task_id": 14,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/reddit_state.json",
|
||
"start_url": "__REDDIT__",
|
||
"geolocation": null,
|
||
"intent_template": "Among the top {{number}} post in \"{{subreddit}}\" forum, {{description}}",
|
||
"instantiation_dict": {
|
||
"number": 10,
|
||
"subreddit": "books",
|
||
"description": "is there any post talks about supporting local book stores? If so, tell me the organizations involved"
|
||
},
|
||
"intent": "Among the top 10 post in \"books\" forum, is there any post talks about supporting local book stores? If so, tell me the organizations involved",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"bookshop.org"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "bookshop.org"
|
||
},
|
||
"intent_template_id": 17,
|
||
"old_task_id": 69
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 15,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "What is the zip code of {{place}}?",
|
||
"instantiation_dict": {
|
||
"place": "Chatham University"
|
||
},
|
||
"intent": "What is the zip code of Chatham University?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"15232"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "15232"
|
||
},
|
||
"intent_template_id": 70,
|
||
"old_task_id": 71
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 16,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "Given the following locations, {{place_list}}, what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.",
|
||
"instantiation_dict": {
|
||
"place_list": [
|
||
"Massachusetts Institute of Technology",
|
||
"Harvard University",
|
||
"Boston Logan International Airport"
|
||
]
|
||
},
|
||
"intent": "Given the following locations, ['Massachusetts Institute of Technology', 'Harvard University', 'Boston Logan International Airport'], what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"The order is Massachusetts Institute of Technology, Harvard University, Boston Logan International Airport"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Massachusetts Institute of Technology, Harvard University, Boston Logan International Airport"
|
||
},
|
||
"intent_template_id": 65,
|
||
"old_task_id": 75
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 17,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "What is the total count of {{status}} reviews amongst all the reviews?",
|
||
"instantiation_dict": {
|
||
"status": "Pending"
|
||
},
|
||
"intent": "What is the total count of Pending reviews amongst all the reviews?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"5"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "5"
|
||
},
|
||
"intent_template_id": 277,
|
||
"old_task_id": 77
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 18,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "What is the duration required to first walk from {{place_A}} to {{place_B}}, and then drive to {{place_C}}?",
|
||
"instantiation_dict": {
|
||
"place_A": "Massachusetts Institute of Technology",
|
||
"place_B": "Harvard University",
|
||
"place_C": "Boston Logan International Airport"
|
||
},
|
||
"intent": "What is the duration required to first walk from Massachusetts Institute of Technology to Harvard University, and then drive to Boston Logan International Airport?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"64 min"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "63 min"
|
||
},
|
||
"intent_template_id": 72,
|
||
"old_task_id": 82
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 19,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "From my stay at {{hotel}}, what's the estimated driving time to reach {{place}}?",
|
||
"instantiation_dict": {
|
||
"hotel": "Homewood Suites Southpointe",
|
||
"place": "PPG Paints Arena"
|
||
},
|
||
"intent": "From my stay at Homewood Suites Southpointe, what's the estimated driving time to reach PPG Paints Arena?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"34 minutes"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "34 minutes"
|
||
},
|
||
"intent_template_id": 64,
|
||
"old_task_id": 88
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 20,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "Which US states border {{state}}?",
|
||
"instantiation_dict": {
|
||
"state": "New Hampshire"
|
||
},
|
||
"intent": "Which US states border New Hampshire?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"Massachusetts",
|
||
"Vermont",
|
||
"Maine"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Massachusetts, Vermont, Maine"
|
||
},
|
||
"intent_template_id": 67,
|
||
"old_task_id": 93
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 21,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Telll me the grand total of invoice {{id}}.",
|
||
"instantiation_dict": {
|
||
"id": "000000002"
|
||
},
|
||
"intent": "Telll me the grand total of invoice 000000002.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"39.64"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "$39.64"
|
||
},
|
||
"intent_template_id": 274,
|
||
"old_task_id": 95
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 22,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Tell me the status of my latest order and when will it arrive",
|
||
"instantiation_dict": {},
|
||
"intent": "Tell me the status of my latest order and when will it arrive",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"The last order was canceled. It will never arrive."
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"reference_answer_raw_annotation": "The last order was canceled. It will never arrive.",
|
||
"string_note": ""
|
||
},
|
||
"intent_template_id": 193,
|
||
"old_task_id": 96
|
||
},
|
||
{
|
||
"sites": [
|
||
"map",
|
||
"wikipedia"
|
||
],
|
||
"task_id": 23,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "Tell me the distance to drive from Carnegie Mellon University to the top computer science school in massachusetts",
|
||
"instantiation_dict": {},
|
||
"intent": "Tell me the distance to drive from Carnegie Mellon University to the top computer science school in massachusetts",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"914km"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "914 km"
|
||
},
|
||
"intent_template_id": 120,
|
||
"old_task_id": 97
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 24,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "Where is the nearest {{places}} to {{start}}, and what is the walking distance to it?",
|
||
"instantiation_dict": {
|
||
"places": "tea cafe",
|
||
"start": "University of Pittsburgh"
|
||
},
|
||
"intent": "Where is the nearest tea cafe to University of Pittsburgh, and what is the walking distance to it?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"Fuku Tea",
|
||
"3716",
|
||
"Forbes Avenue",
|
||
"Central Oakland",
|
||
"Pittsburgh",
|
||
"653m"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Fuku Tea, 3716, Forbes Avenue, Oakland, Central Oakland, Pittsburgh, Allegheny County, Pennsylvania, 15213, United States\n653m"
|
||
},
|
||
"intent_template_id": 66,
|
||
"old_task_id": 98
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 25,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Display the list of issues in the {{repo}} repository that have labels related to {{label}}",
|
||
"instantiation_dict": {
|
||
"label": "questions",
|
||
"repo": "kkroening/ffmpeg-python"
|
||
},
|
||
"intent": "Display the list of issues in the kkroening/ffmpeg-python repository that have labels related to questions",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__GITLAB__/kkroening/ffmpeg-python/-/issues/?sort=created_date&state=opened&label_name%5B%5D=question&first_page_size=20",
|
||
"program_html": [],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 349,
|
||
"old_task_id": 103
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 26,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Presents the monthly count of successful orders {{period}} in MM:COUNT format",
|
||
"instantiation_dict": {
|
||
"period": "from Jan to December 2022"
|
||
},
|
||
"intent": "Presents the monthly count of successful orders from Jan to December 2022 in MM:COUNT format",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"01:11",
|
||
"02:16",
|
||
"03:14",
|
||
"04:8",
|
||
"05:8",
|
||
"06:13",
|
||
"07:9",
|
||
"08:8",
|
||
"09:10",
|
||
"10:4",
|
||
"11:5",
|
||
"12:10"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "01:11 02:16 03:14 04:8 05:8 06:13 07:9 08:8 09:10 10:4 11:5 12:10"
|
||
},
|
||
"intent_template_id": 270,
|
||
"old_task_id": 109
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 27,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Show me the name of the customers who have expressed dissatisfaction with {{product}}",
|
||
"instantiation_dict": {
|
||
"product": "Chloe tank"
|
||
},
|
||
"intent": "Show me the name of the customers who have expressed dissatisfaction with Chloe tank",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"exact_match": "N/A"
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "There is no negative review for Chloe tank",
|
||
"reference_answer_raw_annotation": ""
|
||
},
|
||
"intent_template_id": 245,
|
||
"old_task_id": 115
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 28,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "What is the date when I made my first purchase on this site?",
|
||
"instantiation_dict": {},
|
||
"intent": "What is the date when I made my first purchase on this site?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"3/2/22"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "3/2/22"
|
||
},
|
||
"intent_template_id": 161,
|
||
"old_task_id": 117
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 29,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "I have jaw bruxism problem, show me something that could alleviate the problem.",
|
||
"instantiation_dict": {},
|
||
"intent": "I have jaw bruxism problem, show me something that could alleviate the problem.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": null,
|
||
"program_html": [
|
||
{
|
||
"url": "last",
|
||
"locator": "",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"jaw bruxism",
|
||
"mouth guard"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 151,
|
||
"old_task_id": 118
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 30,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Tell me the reasons why customers like {{product}}",
|
||
"instantiation_dict": {
|
||
"product": "Circe's products"
|
||
},
|
||
"intent": "Tell me the reasons why customers like Circe's products",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"Warm and comfortable. True to size."
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Warm and comfortable. True to size."
|
||
},
|
||
"intent_template_id": 250,
|
||
"old_task_id": 123
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 31,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "What is the price range of {{product}} in the One Stop Market?",
|
||
"instantiation_dict": {
|
||
"product": "teeth grinding mouth guard"
|
||
},
|
||
"intent": "What is the price range of teeth grinding mouth guard in the One Stop Market?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"1.46",
|
||
"85"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "$1.46 - $85"
|
||
},
|
||
"intent_template_id": 159,
|
||
"old_task_id": 125
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 32,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "What brands appear most frequently among the top search terms?",
|
||
"instantiation_dict": {},
|
||
"intent": "What brands appear most frequently among the top search terms?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"Hollister"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Hollister"
|
||
},
|
||
"intent_template_id": 1001,
|
||
"old_task_id": 127
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 33,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "What's the total number of items sold in the most recent {{k}} orders?",
|
||
"instantiation_dict": {
|
||
"k": "7"
|
||
},
|
||
"intent": "What's the total number of items sold in the most recent 7 orders?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"25"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "25"
|
||
},
|
||
"intent_template_id": 1002,
|
||
"old_task_id": 131
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 34,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "How many commits did {{user}} make to {{repo}} on {{date}}?",
|
||
"instantiation_dict": {
|
||
"user": "Eric and Kilian",
|
||
"repo": "a11yproject",
|
||
"date": "1/3/2023"
|
||
},
|
||
"intent": "How many commits did Eric and Kilian make to a11yproject on 1/3/2023?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"0"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "0"
|
||
},
|
||
"intent_template_id": 322,
|
||
"old_task_id": 135
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 35,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "What is the estimated driving time between {{city1}} and {{city2}}?",
|
||
"instantiation_dict": {
|
||
"city1": "the hometown of Joe Biden",
|
||
"city2": "Bridgeport"
|
||
},
|
||
"intent": "What is the estimated driving time between the hometown of Joe Biden and Bridgeport?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"3h 20min"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "3h 20min"
|
||
},
|
||
"intent_template_id": 51,
|
||
"old_task_id": 139
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 36,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "How much I spent on {{category}} shopping during {{time}}",
|
||
"instantiation_dict": {
|
||
"category": "food",
|
||
"time": "from mid Jan to the end Jan 2023"
|
||
},
|
||
"intent": "How much I spent on food shopping during from mid Jan to the end Jan 2023",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"0"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "0"
|
||
},
|
||
"intent_template_id": 162,
|
||
"old_task_id": 144
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 37,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "What is the {{option}} configuration of the {{product}} I bought {{time}}",
|
||
"instantiation_dict": {
|
||
"option": "color",
|
||
"product": "artifical plants",
|
||
"time": "Feb 2023"
|
||
},
|
||
"intent": "What is the color configuration of the artifical plants I bought Feb 2023",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"Green-vines"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Green-vines"
|
||
},
|
||
"intent_template_id": 155,
|
||
"old_task_id": 149
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 38,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "What is the minimum travel time by car from {{location1}} to {{location2}}?",
|
||
"instantiation_dict": {
|
||
"location1": "Animal Rescue League of Pittsburgh",
|
||
"location2": "Schenley park"
|
||
},
|
||
"intent": "What is the minimum travel time by car from Animal Rescue League of Pittsburgh to Schenley park?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"9min"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "9min"
|
||
},
|
||
"intent_template_id": 36,
|
||
"old_task_id": 155
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 39,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Checkout merge requests assigned to me",
|
||
"instantiation_dict": {},
|
||
"intent": "Checkout merge requests assigned to me",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__GITLAB__/dashboard/merge_requests?assignee_username=byteblaze",
|
||
"program_html": [],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 290,
|
||
"old_task_id": 156
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 40,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Show all customers",
|
||
"instantiation_dict": {},
|
||
"intent": "Show all customers",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__SHOPPING_ADMIN__/customer/index/",
|
||
"program_html": [],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 255,
|
||
"old_task_id": 157
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 41,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all {{num}} cards",
|
||
"instantiation_dict": {
|
||
"num": 40
|
||
},
|
||
"intent": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all 40 cards",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__SHOPPING__/game-card-holder-storage-case-for-nintendo-switch-games-or-ps-vita-game-case-or-sd-memory-cards-black.html",
|
||
"program_html": [],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 171,
|
||
"old_task_id": 162
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 42,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__/photosmart-plus-b209-clr-inkjetfb-p-s-c-usb-wrls-1.html",
|
||
"geolocation": null,
|
||
"intent_template": "What are the main criticisms of this product? Please extract the relevant sentences.",
|
||
"instantiation_dict": {},
|
||
"intent": "What are the main criticisms of this product? Please extract the relevant sentences.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"The wireless connection works on a whim (about 40% of the time I've owned it)",
|
||
"It seems to constantly run out of ink",
|
||
"Cartridge prices are less than some printers I've had",
|
||
"This printer seems to have more reasons NOT to work (none that are findable or correctable) Ex: error boxes saying that it's out of paper when it automatically switches to photo printing for some reason",
|
||
"Scanner is as slow as my first scanner I ever owned in the mid-90's",
|
||
"For the $176 I paid, there isn't even a fax component on it. I guess the \"PLUS\" part of it's name is in reference to the migraines it causes when you can't figure out the new reason why it's not working for the 10th time in the past 2 months."
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "The wireless connection works on a whim (about 40% of the time I've owned it). It seems to constantly run out of ink. Cartridge prices are less than some printers I've had, but now I understand why. This printer seems to have more reasons NOT to work (none that are findable or correctable) Ex: error boxes saying that it's out of paper when it automatically switches to photo printing for some reason. Scanner is as slow as my first scanner I ever owned in the mid-90's. For the $176 I paid, there isn't even a fax component on it. I guess the \"PLUS\" part of it's name is in reference to the migraines it causes when you can't figure out the new reason why it's not working for the 10th time in the past 2 months."
|
||
},
|
||
"intent_template_id": 136,
|
||
"old_task_id": 167
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 43,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Tell me the full names of the repositories where I made contributions and they got {{description}} stars?",
|
||
"instantiation_dict": {
|
||
"description": "the most"
|
||
},
|
||
"intent": "Tell me the full names of the repositories where I made contributions and they got the most stars?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"a11yproject.com",
|
||
"design"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "a11yproject.com, Primer/design"
|
||
},
|
||
"intent_template_id": 289,
|
||
"old_task_id": 169
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 44,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Open my latest updated issue that has keyword \"{{keyword}}\" in its title to check if it is closed",
|
||
"instantiation_dict": {
|
||
"keyword": "better"
|
||
},
|
||
"intent": "Open my latest updated issue that has keyword \"better\" in its title to check if it is closed",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match",
|
||
"url_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"No, it is open"
|
||
]
|
||
},
|
||
"reference_url": "__GITLAB__/byteblaze/empathy-prompts/-/issues/8",
|
||
"program_html": [],
|
||
"reference_answer_raw_annotation": "Not closed",
|
||
"string_note": "",
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 310,
|
||
"old_task_id": 173
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 45,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Open my latest created issue that has {{keyword}} in its title to check if it is closed",
|
||
"instantiation_dict": {
|
||
"keyword": "homepage content"
|
||
},
|
||
"intent": "Open my latest created issue that has homepage content in its title to check if it is closed",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match",
|
||
"url_match"
|
||
],
|
||
"reference_answers": {
|
||
"exact_match": "Yes"
|
||
},
|
||
"reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/issues/719",
|
||
"program_html": [],
|
||
"reference_answer_raw_annotation": "closed",
|
||
"string_note": ""
|
||
},
|
||
"intent_template_id": 500,
|
||
"old_task_id": 182
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 46,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Tell me the total cost of my latest {{status}} order?",
|
||
"instantiation_dict": {
|
||
"status": "complete"
|
||
},
|
||
"intent": "Tell me the total cost of my latest complete order?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"65.32"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "65.32"
|
||
},
|
||
"intent_template_id": 214,
|
||
"old_task_id": 190
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 47,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Compare the payment difference of the last {{N}} {{status_1}} orders and {{status_2}} orders",
|
||
"instantiation_dict": {
|
||
"status_1": "cancelled",
|
||
"status_2": "completed",
|
||
"N": "4"
|
||
},
|
||
"intent": "Compare the payment difference of the last 4 cancelled orders and completed orders",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"194.25"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "194.25"
|
||
},
|
||
"intent_template_id": 367,
|
||
"old_task_id": 196
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 48,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Get the {{attribute}} of the {{status}} order",
|
||
"instantiation_dict": {
|
||
"attribute": "date",
|
||
"status": "most recent canlled"
|
||
},
|
||
"intent": "Get the date of the most recent canlled order",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"May 23 2023"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "May 23, 2023"
|
||
},
|
||
"intent_template_id": 366,
|
||
"old_task_id": 202
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 49,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__/a11yproject/a11yproject.com",
|
||
"geolocation": null,
|
||
"intent_template": "How many commits did {{user}} make on {{date}}?",
|
||
"instantiation_dict": {
|
||
"user": "kilian",
|
||
"date": "3/5/2023"
|
||
},
|
||
"intent": "How many commits did kilian make on 3/5/2023?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"1"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "1"
|
||
},
|
||
"intent_template_id": 320,
|
||
"old_task_id": 205
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 50,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Find the customer name and email with phone number {{PhoneNum}}",
|
||
"instantiation_dict": {
|
||
"PhoneNum": "8015551212"
|
||
},
|
||
"intent": "Find the customer name and email with phone number 8015551212",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"Sean Miller",
|
||
"sean.miller@gmail.com"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Sean Miller, sean.miller@gmail.com"
|
||
},
|
||
"intent_template_id": 364,
|
||
"old_task_id": 211
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 51,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "What are the key aspects that the customers don't like about {{product}}",
|
||
"instantiation_dict": {
|
||
"product": "Circe ice fleece"
|
||
},
|
||
"intent": "What are the key aspects that the customers don't like about Circe ice fleece",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"exact_match": "N/A"
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "N/A"
|
||
},
|
||
"intent_template_id": 249,
|
||
"old_task_id": 215
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 52,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "Show me the walking distance from nearby hotels to {{location}} that take at most {{n}} minutes?",
|
||
"instantiation_dict": {
|
||
"location": "Gardner Steel Conference Center,",
|
||
"n": 5
|
||
},
|
||
"intent": "Show me the walking distance from nearby hotels to Gardner Steel Conference Center, that take at most 5 minutes?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"Wyndham Pittsburgh University Cente: 375m",
|
||
"The Oaklander Hotel: 338m"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Wyndham Pittsburgh University Cente: 375 m\nThe Oaklander Hotel: 338 m"
|
||
},
|
||
"intent_template_id": 41,
|
||
"old_task_id": 220
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 53,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "I am at CMU Pittsburgh, how long it takes to the nearest {{location}} with different transportation methods?",
|
||
"instantiation_dict": {
|
||
"location": "USPS postal office"
|
||
},
|
||
"intent": "I am at CMU Pittsburgh, how long it takes to the nearest USPS postal office with different transportation methods?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"Walk: 1 minute",
|
||
"Drive: less than 1 minute",
|
||
"Bike: less than 1 minute"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Walk: 1 minute to walk and\nDrive: less than 1 minute\nBike: less than 1 minute"
|
||
},
|
||
"intent_template_id": 35,
|
||
"old_task_id": 221
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 54,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "What do customers say about {{product_type}} from {{manufature}}",
|
||
"instantiation_dict": {
|
||
"product_type": "brush",
|
||
"manufature": "sephora"
|
||
},
|
||
"intent": "What do customers say about brush from sephora",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"No reviews available for the sephora brushes."
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "The sephora brushes don't have reviews",
|
||
"reference_answer_raw_annotation": "N/A"
|
||
},
|
||
"intent_template_id": 135,
|
||
"old_task_id": 225
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 55,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "What is the price range for products from {{brand}}?",
|
||
"instantiation_dict": {
|
||
"brand": "EYZUTAK"
|
||
},
|
||
"intent": "What is the price range for products from EYZUTAK?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"9.99"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "$9.99"
|
||
},
|
||
"intent_template_id": 370,
|
||
"old_task_id": 227
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 56,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Get the order number of my most recent {{status}} order ",
|
||
"instantiation_dict": {
|
||
"status": "under delivery"
|
||
},
|
||
"intent": "Get the order number of my most recent under delivery order ",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"N/A"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "There is no under delivery order"
|
||
},
|
||
"intent_template_id": 213,
|
||
"old_task_id": 235
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 57,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "Where is the nearest {{location}} from {{location2}} {{condition}}",
|
||
"instantiation_dict": {
|
||
"location": "pharmacy",
|
||
"location2": "Carnegie Mellon",
|
||
"condition": "I can walk within 20mins"
|
||
},
|
||
"intent": "Where is the nearest pharmacy from Carnegie Mellon I can walk within 20mins",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"Schiller's Pharmacy",
|
||
"811",
|
||
"South Aiken Avenue",
|
||
"Shadyside",
|
||
"Pittsburgh"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Schiller's Pharmacy, 811, South Aiken Avenue, Shadyside, Pittsburgh, Allegheny County, 15232, United States"
|
||
},
|
||
"intent_template_id": 39,
|
||
"old_task_id": 236
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 58,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "I am doing a market survey for one stop market, show me the most expensive product from {{product_category}} category",
|
||
"instantiation_dict": {
|
||
"product_category": "competative swimwear"
|
||
},
|
||
"intent": "I am doing a market survey for one stop market, show me the most expensive product from competative swimwear category",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__SHOPPING__/clothing-shoes-jewelry/sport-specific-clothing/competitive-swimwear.html?product_list_order=price&product_list_dir=desc",
|
||
"program_html": [],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 138,
|
||
"old_task_id": 240
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 59,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Show me the {{information}} of the customer who is the most unhappy with {{product}}",
|
||
"instantiation_dict": {
|
||
"information": "email address",
|
||
"product": "the style of Zoe products"
|
||
},
|
||
"intent": "Show me the email address of the customer who is the most unhappy with the style of Zoe products",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"exact_match": "N/A"
|
||
},
|
||
"reference_url": "Valorie doesn't have a email in the system",
|
||
"program_html": [],
|
||
"string_note": "There is no negative review for Zoe products",
|
||
"reference_answer_raw_annotation": "N/A"
|
||
},
|
||
"intent_template_id": 244,
|
||
"old_task_id": 247
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 60,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "Tell me the coordinates of {{location}} in DD format",
|
||
"instantiation_dict": {
|
||
"location": "Apple Store near Pitt"
|
||
},
|
||
"intent": "Tell me the coordinates of Apple Store near Pitt in DD format",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"40.451",
|
||
"-79.933"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "40.4511693, -79.9334241"
|
||
},
|
||
"intent_template_id": 46,
|
||
"old_task_id": 250
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 61,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "What is the {{information}} of {{location}}",
|
||
"instantiation_dict": {
|
||
"location": "Western Pennsylvania Hospital",
|
||
"information": "phone number"
|
||
},
|
||
"intent": "What is the phone number of Western Pennsylvania Hospital",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"+1 412 578 5000"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "4125785000"
|
||
},
|
||
"intent_template_id": 501,
|
||
"old_task_id": 254
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 62,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "See all public projects",
|
||
"instantiation_dict": {},
|
||
"intent": "See all public projects",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__GITLAB__/explore",
|
||
"program_html": [],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 325,
|
||
"old_task_id": 258
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 63,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Get me my RSS feed token",
|
||
"instantiation_dict": {},
|
||
"intent": "Get me my RSS feed token",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"TMN_bBn9Z48qVbUFZV45"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "TMN_bBn9Z48qVbUFZV45"
|
||
},
|
||
"intent_template_id": 312,
|
||
"old_task_id": 259
|
||
},
|
||
{
|
||
"sites": [
|
||
"wikipedia",
|
||
"map"
|
||
],
|
||
"task_id": 64,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "What's the closest national park to {{city}}? How long does it take to bike there?",
|
||
"instantiation_dict": {
|
||
"city": "Vinalhaven, ME"
|
||
},
|
||
"intent": "What's the closest national park to Vinalhaven, ME? How long does it take to bike there?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"Acadia National Park"
|
||
],
|
||
"fuzzy_match": [
|
||
"10h 33min"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Acadia National Park\n10h 33min"
|
||
},
|
||
"intent_template_id": 85,
|
||
"old_task_id": 268
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 65,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Show me products under ${{price}} in \"{{product_category}}\" category",
|
||
"instantiation_dict": {
|
||
"price": "30",
|
||
"product_category": "men shoes"
|
||
},
|
||
"intent": "Show me products under $30 in \"men shoes\" category",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__SHOPPING__/clothing-shoes-jewelry/men/shoes.html?price=0-30",
|
||
"program_html": [],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 139,
|
||
"old_task_id": 270
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 66,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Search for \"{{keyword}}\"",
|
||
"instantiation_dict": {
|
||
"keyword": "switch accessories"
|
||
},
|
||
"intent": "Search for \"switch accessories\"",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__SHOPPING__/catalogsearch/result/?q=switch+accessories",
|
||
"program_html": [],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 212,
|
||
"old_task_id": 276
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 67,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Look up the most recent models of XBox controllers released between 2020-2021?",
|
||
"instantiation_dict": {},
|
||
"intent": "Look up the most recent models of XBox controllers released between 2020-2021?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__SHOPPING__/microsoft-xbox-controller-carbon-black-for-series-x-series-s-xbox-one-windows-10-android-ios-bundled-with-dual-port-charging-dock-xbox-controller-skin-voucher-premgear-cloth.html",
|
||
"program_html": [],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 210,
|
||
"old_task_id": 283
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 68,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Show the least expensive {{product}} with a minimum storage capacity of {{min_storage}}.",
|
||
"instantiation_dict": {
|
||
"product": "switch card holder",
|
||
"min_storage": "15 cards"
|
||
},
|
||
"intent": "Show the least expensive switch card holder with a minimum storage capacity of 15 cards.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__SHOPPING__/game-card-holder-storage-case-for-nintendo-switch-games-or-ps-vita-game-case-or-sd-memory-cards-black.html",
|
||
"program_html": [],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 207,
|
||
"old_task_id": 285
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 69,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "How much time does it take from Pittsburgh to Philadelphia by car?",
|
||
"instantiation_dict": {},
|
||
"intent": "How much time does it take from Pittsburgh to Philadelphia by car?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"8h 33min"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "5h 47min"
|
||
},
|
||
"intent_template_id": 47,
|
||
"old_task_id": 287
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 70,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Tell me the {{attribute}} of the customer who has the most cancellations in the history",
|
||
"instantiation_dict": {
|
||
"attribute": "name"
|
||
},
|
||
"intent": "Tell me the name of the customer who has the most cancellations in the history",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"Samantha Jones"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Samantha Jones"
|
||
},
|
||
"intent_template_id": 234,
|
||
"old_task_id": 288
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 71,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Show me the command to clone {{repo}} with SSH.",
|
||
"instantiation_dict": {
|
||
"repo": "the best GAN python implementation"
|
||
},
|
||
"intent": "Show me the command to clone the best GAN python implementation with SSH.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"exact_match": "ssh://git@localhost:2222/eriklindernoren/PyTorch-GAN.git"
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "ssh://git@localhost:2222/eriklindernoren/PyTorch-GAN.git"
|
||
},
|
||
"intent_template_id": 329,
|
||
"old_task_id": 296
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 72,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Show the most recent {{status}} order",
|
||
"instantiation_dict": {
|
||
"status": "pending"
|
||
},
|
||
"intent": "Show the most recent pending order",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__SHOPPING__/sales/order/view/order_id/189/",
|
||
"program_html": [],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 180,
|
||
"old_task_id": 300
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 73,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Tell me who has made the most contributions, in terms of number of commits, to the {{repo}} project",
|
||
"instantiation_dict": {
|
||
"repo": "Pytorch GAN"
|
||
},
|
||
"intent": "Tell me who has made the most contributions, in terms of number of commits, to the Pytorch GAN project",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"Erik Linder-Norén"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Erik Linder-Norén"
|
||
},
|
||
"intent_template_id": 323,
|
||
"old_task_id": 311
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 74,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Which number to call for the customer service?",
|
||
"instantiation_dict": {},
|
||
"intent": "Which number to call for the customer service?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"exact_match": "N/A"
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "There is no phone number in the website",
|
||
"reference_answer_raw_annotation": "N/A"
|
||
},
|
||
"intent_template_id": 134,
|
||
"old_task_id": 313
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 75,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "List the {{attribute}} of the top 3 contributors to {{repo}} repo, ranked by the number of commits?",
|
||
"instantiation_dict": {
|
||
"repo": "2019-nCov",
|
||
"attribute": "last names"
|
||
},
|
||
"intent": "List the last names of the top 3 contributors to 2019-nCov repo, ranked by the number of commits?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"Lo",
|
||
"Chen",
|
||
"Chu"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Lo, Chen, Chu"
|
||
},
|
||
"intent_template_id": 324,
|
||
"old_task_id": 318
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 76,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "How much refund I should expect from my order canlled in {{time}}, including shipping fee",
|
||
"instantiation_dict": {
|
||
"time": "2022"
|
||
},
|
||
"intent": "How much refund I should expect from my order canlled in 2022, including shipping fee",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"3053.97"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "3053.97"
|
||
},
|
||
"intent_template_id": 160,
|
||
"old_task_id": 321
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 77,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Show me the \"{{product}}\" listings by {{sorting_order}}.",
|
||
"instantiation_dict": {
|
||
"product": "chairs",
|
||
"sorting_order": "ascending price"
|
||
},
|
||
"intent": "Show me the \"chairs\" listings by ascending price.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__SHOPPING__/catalogsearch/result/index/?product_list_order=price&q=chairs&product_list_dir=asc",
|
||
"program_html": [],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 208,
|
||
"old_task_id": 324
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 78,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "How much did I spend on shopping at One Stop Market {{time}}? They gave me a 20% discount on the total amount for orders exceeding $200 in cash",
|
||
"instantiation_dict": {
|
||
"time": "on November 2022"
|
||
},
|
||
"intent": "How much did I spend on shopping at One Stop Market on November 2022? They gave me a 20% discount on the total amount for orders exceeding $200 in cash",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"359.546"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "359.546"
|
||
},
|
||
"intent_template_id": 147,
|
||
"old_task_id": 333
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 79,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Tell me when I last ordered my {{description}}?",
|
||
"instantiation_dict": {
|
||
"description": "body butter"
|
||
},
|
||
"intent": "Tell me when I last ordered my body butter?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"January 16th 2023"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "January 16th 2023"
|
||
},
|
||
"intent_template_id": 169,
|
||
"old_task_id": 335
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 80,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "How many reviews our shop received {{time}}?",
|
||
"instantiation_dict": {
|
||
"time": "in May 2023"
|
||
},
|
||
"intent": "How many reviews our shop received in May 2023?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"0"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "0"
|
||
},
|
||
"intent_template_id": 248,
|
||
"old_task_id": 348
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 81,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Who else have access to my repo {{repo}}, show me their usernames",
|
||
"instantiation_dict": {
|
||
"repo": "gimmiethat.space"
|
||
},
|
||
"intent": "Who else have access to my repo gimmiethat.space, show me their usernames",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"exact_match": "yjlou"
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "yjlou"
|
||
},
|
||
"intent_template_id": 298,
|
||
"old_task_id": 349
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 82,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "List products from {{product_category}} category by {{order}} price",
|
||
"instantiation_dict": {
|
||
"product_category": "living room furtniture",
|
||
"order": "descending"
|
||
},
|
||
"intent": "List products from living room furtniture category by descending price",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__SHOPPING__/home-kitchen/furniture/living-room-furniture.html?product_list_order=price&product_list_dir=desc",
|
||
"program_html": [],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 137,
|
||
"old_task_id": 354
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 83,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Checkout merge requests requiring my review",
|
||
"instantiation_dict": {},
|
||
"intent": "Checkout merge requests requiring my review",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__GITLAB__/dashboard/merge_requests?reviewer_username=byteblaze",
|
||
"program_html": [],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 291,
|
||
"old_task_id": 357
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 84,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Show me the {{info}} for order number {{order_number}}.",
|
||
"instantiation_dict": {
|
||
"info": "order statuses",
|
||
"order_number": "170 and 189"
|
||
},
|
||
"intent": "Show me the order statuses for order number 170 and 189.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"170: cancelled",
|
||
"189: pending"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "170: cancelled, 189: pending"
|
||
},
|
||
"intent_template_id": 206,
|
||
"old_task_id": 361
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 85,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "Measure distance between {{location/address_1}} and {{location/address_2}} by walking",
|
||
"instantiation_dict": {
|
||
"location/address_1": "Carnegie Mellon University",
|
||
"location/address_2": "CVS (closet one)"
|
||
},
|
||
"intent": "Measure distance between Carnegie Mellon University and CVS (closet one) by walking",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"1.4km"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "1.4km"
|
||
},
|
||
"intent_template_id": 58,
|
||
"old_task_id": 367
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 86,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "find discounted items.",
|
||
"instantiation_dict": {},
|
||
"intent": "find discounted items.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"exact_match": "N/A"
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "There is no function to show only discount items",
|
||
"reference_answer_raw_annotation": "There is no function to show only discount items."
|
||
},
|
||
"intent_template_id": 188,
|
||
"old_task_id": 368
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 87,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "Pull up the description page of {{location}} on Map",
|
||
"instantiation_dict": {
|
||
"location": "Carnegie Music Hall"
|
||
},
|
||
"intent": "Pull up the description page of Carnegie Music Hall on Map",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": null,
|
||
"program_html": [
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('[id=\"sidebar_content\"').outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Carnegie Music Hall"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 52,
|
||
"old_task_id": 369
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 88,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Preview the {{name}} theme for my shop",
|
||
"instantiation_dict": {
|
||
"name": "Magento Blank"
|
||
},
|
||
"intent": "Preview the Magento Blank theme for my shop",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__SHOPPING_ADMIN__/admin/system_design_theme/edit/id/1",
|
||
"program_html": [],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 266,
|
||
"old_task_id": 374
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 89,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Summarize customer reviews for {{product}}.",
|
||
"instantiation_dict": {
|
||
"product": "Amazon Echo Dot 3rd generation"
|
||
},
|
||
"intent": "Summarize customer reviews for Amazon Echo Dot 3rd generation.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"Serena: It was the exact same one that came with my echo show that my puppy",
|
||
"Amazon Customer: It doesn’t work .",
|
||
"BlessedGirl: Great replacement for Echo Dot.",
|
||
"Jeff Sederquist: Great product no issues. It's for a Echo show 5",
|
||
"melvin m.: Very good charger but I had to buy this one because when I bought an echo show 5, I came with an echo show 10 charger in the package with the device",
|
||
"Jack Smith: Looks like pictures",
|
||
"Ignacio: Muy bueno de buena calidad",
|
||
"BusyB: My kids bent the original cord for their Echo show 5 and this replacement cord is just like the one it came with. It works just fine. Been using it for 2 weeks now and no problems.",
|
||
"Kee Williams: Great value works better than original",
|
||
"Howard A. Sinclair: Great product!!! Works exactly as it is supposed to."
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": []
|
||
},
|
||
"intent_template_id": 182,
|
||
"old_task_id": 376
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 90,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "Find the {{space}} around {{location}}",
|
||
"instantiation_dict": {
|
||
"location": "Carnegie Music Hall",
|
||
"space": "hotel"
|
||
},
|
||
"intent": "Find the hotel around Carnegie Music Hall",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__MAP__/search?query=hotels%20near%20Carnegie%20Music%20Hall",
|
||
"program_html": [],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 59,
|
||
"old_task_id": 381
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 91,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "I am arriving at Carnegie Mellon University. Find the nearby US Citizenship and Immigration Services and the walking distance to the nearest Social Security Administration from US Citizenship and Immigration Services",
|
||
"instantiation_dict": {},
|
||
"intent": "I am arriving at Carnegie Mellon University. Find the nearby US Citizenship and Immigration Services and the walking distance to the nearest Social Security Administration from US Citizenship and Immigration Services",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"fuzzy_match": [
|
||
"N/A"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "There is no USCIS nearby",
|
||
"reference_answer_raw_annotation": "N/A"
|
||
},
|
||
"intent_template_id": 781,
|
||
"old_task_id": 382
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 92,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "I am arriving at Pittsburgh Airport. Show me the name of a Hyatt hotel if there is any nearby. Tell me the names of supermarkets that are within 15mins driving from the hotel",
|
||
"instantiation_dict": {},
|
||
"intent": "I am arriving at Pittsburgh Airport. Show me the name of a Hyatt hotel if there is any nearby. Tell me the names of supermarkets that are within 15mins driving from the hotel",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"Hyatt Regency Pittsburgh International Airport",
|
||
"Giant Eagle",
|
||
"ALDI"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Hyatt Regency Pittsburgh International Airport Giant Eagle, ALDI"
|
||
},
|
||
"intent_template_id": 782,
|
||
"old_task_id": 383
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 93,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "List the customer names who complain about the quality of EYZUTAK phone cases",
|
||
"instantiation_dict": {},
|
||
"intent": "List the customer names who complain about the quality of EYZUTAK phone cases",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"Lisa Lee",
|
||
"Evelyn Kurver",
|
||
"Amanda",
|
||
"N Randall"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Lisa Lee, Evelyn Kurver, Amanda, N Randall"
|
||
},
|
||
"intent_template_id": 666,
|
||
"old_task_id": 384
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 94,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "What is the rating of {{product}}",
|
||
"instantiation_dict": {
|
||
"product": "Ugreen lightning to 3.5mm cable"
|
||
},
|
||
"intent": "What is the rating of Ugreen lightning to 3.5mm cable. Please round to the nearest whole number",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"65 |OR| 3"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "Around 3.1 stars"
|
||
},
|
||
"intent_template_id": 1355,
|
||
"old_task_id": 386
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 95,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Who gave {{stars}} for phone cases from EYZUTAK",
|
||
"instantiation_dict": {
|
||
"stars": "4 or 5 stars"
|
||
},
|
||
"intent": "Who gave 4 or 5 stars for phone cases from EYZUTAK",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"MH",
|
||
"Misba009",
|
||
"Amanda",
|
||
"Amazon Customer",
|
||
"Cally",
|
||
"Bethany Robertson"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "",
|
||
"reference_answer_raw_annotation": "MH, Misba009, Amanda, Amazon Customer, Cally, Bethany Robertson, Bethany Robertson"
|
||
},
|
||
"intent_template_id": 1356,
|
||
"old_task_id": 387
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 96,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Post \"{{content}}\" for the merge request related to {{mr}} in {{repo}} project",
|
||
"instantiation_dict": {
|
||
"mr": "color ulitity",
|
||
"repo": "a11yproject.com",
|
||
"content": "Good idea"
|
||
},
|
||
"intent": "Post \"Good idea\" for the merge request related to color ulitity in a11yproject.com project",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/merge_requests/1071",
|
||
"program_html": [
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.querySelector('.timeline-discussion-body').outerText",
|
||
"required_contents": {
|
||
"exact_match": "Good idea"
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 348,
|
||
"old_task_id": 392
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit"
|
||
],
|
||
"task_id": 97,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/reddit_state.json",
|
||
"start_url": "__REDDIT__",
|
||
"geolocation": null,
|
||
"intent_template": "Change my reddit bio to \"{{content}}\"",
|
||
"instantiation_dict": {
|
||
"content": "Seeking SDE positions"
|
||
},
|
||
"intent": "Change my reddit bio to \"Seeking SDE positions\"",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__REDDIT__/user/MarvelsGrantMan136",
|
||
"locator": "document.querySelector(\".user-bio__biography\").outerText",
|
||
"required_contents": {
|
||
"exact_match": "Seeking SDE positions"
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 6,
|
||
"old_task_id": 401
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit"
|
||
],
|
||
"task_id": 98,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/reddit_state.json",
|
||
"start_url": "__REDDIT__",
|
||
"geolocation": null,
|
||
"intent_template": "Upvote the newest post in {{subreddit}} subreddit",
|
||
"instantiation_dict": {
|
||
"subreddit": "books"
|
||
},
|
||
"intent": "Upvote the newest post in books subreddit",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__REDDIT__/f/books/124260/adults-reading-to-each-other-out-loud",
|
||
"locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"vote vote--user-upvoted"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 22,
|
||
"old_task_id": 404
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 99,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Go to the merge request on {{topic}} I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.",
|
||
"instantiation_dict": {
|
||
"topic": "verification functions"
|
||
},
|
||
"intent": "Go to the merge request on verification functions I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__GITLAB__/byteblaze/a11y-webring.club/-/merge_requests/40",
|
||
"locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.querySelector('.timeline-discussion-body').outerText",
|
||
"required_contents": {
|
||
"exact_match": "@davepgreene"
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 360,
|
||
"old_task_id": 415
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 100,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Set my gitlab status as {{status}}.",
|
||
"instantiation_dict": {
|
||
"status": "Enjoying life"
|
||
},
|
||
"intent": "Set my gitlab status as Enjoying life.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__GITLAB__/byteblaze",
|
||
"locator": "document.querySelector('.cover-status').lastChild.textContent",
|
||
"required_contents": {
|
||
"exact_match": "Enjoying life"
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 361,
|
||
"old_task_id": 419
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 101,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Mark all {{brand}} shirts on sale",
|
||
"instantiation_dict": {
|
||
"brand": "Hollister"
|
||
},
|
||
"intent": "Mark all Hollister shirts on sale",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__SHOPPING_ADMIN__/catalog/product/edit/id/126/",
|
||
"locator": "document.querySelector('input[name=\"product[sale]\"]').value",
|
||
"required_contents": {
|
||
"exact_match": "1"
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 237,
|
||
"old_task_id": 423
|
||
},
|
||
{
|
||
"sites": [
|
||
"wikipedia",
|
||
"map"
|
||
],
|
||
"task_id": 102,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "Find the page of {{description}} on the map.",
|
||
"instantiation_dict": {
|
||
"description": "the place in Pennsylvania where a plane crashed during the September 11th attacks"
|
||
},
|
||
"intent": "Find the page of the place in Pennsylvania where a plane crashed during the September 11th attacks on the map.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('[id=\"sidebar_content\"').outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Somerset County"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 371,
|
||
"old_task_id": 426
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 103,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__/tall-pink-taper-candles-4-piece-orange-colored-tapered-candles-gradient-candles-10-6-inches-tall-tie-dye-candle-set-large-dripless-long-burning-candlesticks-two-color-taper-candles-candlesticks.html |AND| __SHOPPING__/spaas-white-taper-candles-4-pack-10-inch-tall-candles-scent-free-premium-wax-candle-sticks-8-hour-long-burning-white-candlesticks-for-home-decoration-wedding-holiday-and-parties.html |AND| __SHOPPING__/white-starfish-wall-candle-sconces-set-of-2-beach-decor-ocean-themed-wall-mount-candleholders-nautical-style-beach-bathroom-decor-coastal-farmhouse-seashell-candle-holders.html",
|
||
"geolocation": null,
|
||
"intent_template": "Add the product with the lowest per unit price from my open tabs to the shopping cart",
|
||
"instantiation_dict": {},
|
||
"intent": "Add the product with the lowest per unit price from my open tabs to the shopping cart",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__SHOPPING__/checkout/cart",
|
||
"locator": "",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"SPAAS White Taper Candles - 4 Pack |OR| 10 Inch Tall Candles, Scent-Free Premium Wax Candle Sticks |OR| 8 Hour Long Burning White Candlesticks for Home Decoration, Wedding, Holiday and Parties"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 145,
|
||
"old_task_id": 431
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 104,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "I previously ordered some {{product}} {{time}} and later cancelled. Can you reorder it for me?",
|
||
"instantiation_dict": {
|
||
"product": "a make up removal kit",
|
||
"time": "during summer 2022"
|
||
},
|
||
"intent": "I previously ordered some a make up removal kit during summer 2022 and later cancelled. Can you reorder it for me?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "func:shopping_get_latest_order_url()",
|
||
"locator": "document.querySelector(\".order-details-items.ordered\").outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"B0738JQG6Q"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 156,
|
||
"old_task_id": 440
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 105,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "set the homepage URL on my GitLab profile to {{url}}",
|
||
"instantiation_dict": {
|
||
"url": "https://egg.tart.com"
|
||
},
|
||
"intent": "set the homepage URL on my GitLab profile to https://egg.tart.com",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__GITLAB__/byteblaze",
|
||
"locator": "document.querySelector('.profile-header [itemprop=\"url\"]').outerText",
|
||
"required_contents": {
|
||
"exact_match": "egg.tart.com"
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 331,
|
||
"old_task_id": 448
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 106,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Disable {{product}} from the site, they are facing some quality issues.",
|
||
"instantiation_dict": {
|
||
"product": "Ryker Tee Crew Neck"
|
||
},
|
||
"intent": "Disable Ryker Tee Crew Neck from the site, they are facing some quality issues.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__SHOPPING_ADMIN__/catalog/product/edit/id/478/",
|
||
"locator": "document.querySelector('[name=\"product[status]\"').value",
|
||
"required_contents": {
|
||
"exact_match": "2"
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 242,
|
||
"old_task_id": 454
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 107,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1481/",
|
||
"geolocation": null,
|
||
"intent_template": "{{action}} the price of this product by {{amount}}",
|
||
"instantiation_dict": {
|
||
"amount": "$5",
|
||
"action": "Reduce"
|
||
},
|
||
"intent": "Reduce the price of this product by $5",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1481/",
|
||
"locator": "document.querySelector('[name=\"product[price]\"').value",
|
||
"required_contents": {
|
||
"exact_match": "27.00"
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 247,
|
||
"old_task_id": 458
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 108,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Update the product description of {{product}} to highlight the real user positive reviews by quoting the comments",
|
||
"instantiation_dict": {
|
||
"product": "Antonia Racer Tank"
|
||
},
|
||
"intent": "Update the product description of Antonia Racer Tank to highlight the real user positive reviews by quoting the comments",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__SHOPPING_ADMIN__/../antonia-racer-tank.html",
|
||
"locator": "document.querySelector('.data.item.content').outerText + (document.querySelector('.product.attribute.overview [itemprop=\"description\"]')?.outerText || '')",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"This is in regular rotation at the gym",
|
||
"Its colorful and looks kinda cute under my exercise tanks",
|
||
"it's very stylish for yoga or something else low impact"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 251,
|
||
"old_task_id": 464
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 109,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Add {{product}} to my wish list",
|
||
"instantiation_dict": {
|
||
"product": "2 Hawaiian Bamboo Orchid Roots #zc50 - by Discount Hawaiian Gifts"
|
||
},
|
||
"intent": "Add 2 Hawaiian Bamboo Orchid Roots #zc50 - by Discount Hawaiian Gifts to my wish list",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__SHOPPING__/wishlist/",
|
||
"locator": "document.querySelector('.products-grid.wishlist').outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"2 Hawaiian Bamboo Orchid Roots #zc50 - by Discount Hawaiian Gifts"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 186,
|
||
"old_task_id": 466
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 110,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Cancel order {{id}}",
|
||
"instantiation_dict": {
|
||
"id": "302"
|
||
},
|
||
"intent": "Cancel order 302",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__SHOPPING_ADMIN__/sales/order/view/order_id/302/",
|
||
"locator": "document.querySelector(\"#order_status\").outerText",
|
||
"required_contents": {
|
||
"exact_match": "Canceled"
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 257,
|
||
"old_task_id": 470
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 111,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Set up a new, empty repository with the name {{project_name}}?",
|
||
"instantiation_dict": {
|
||
"project_name": "awesome_llm_reading"
|
||
},
|
||
"intent": "Set up a new, empty repository with the name awesome_llm_reading?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__GITLAB__/byteblaze/awesome_llm_reading",
|
||
"locator": "",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"awesome_llm_reading"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 292,
|
||
"old_task_id": 476
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 112,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "{{name}} wants to check my dotfile configurations. Please invite him to the repo as a guest.",
|
||
"instantiation_dict": {
|
||
"name": "Vinta"
|
||
},
|
||
"intent": "Vinta wants to check my dotfile configurations. Please invite him to the repo as a guest.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__GITLAB__/byteblaze/dotfiles/-/project_members",
|
||
"locator": "func:gitlab_get_project_memeber_role(__page__, 'vinta')",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Guest"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 294,
|
||
"old_task_id": 485
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 113,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Change the page title of \"{{old-heading}}\" page on my site to \"{{heading}}\".",
|
||
"instantiation_dict": {
|
||
"old-heading": "Home Page",
|
||
"heading": "This is the home page!! Leave here!!"
|
||
},
|
||
"intent": "Change the page title of \"Home Page\" page on my site to \"This is the home page!! Leave here!!\".",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__SHOPPING_ADMIN__/cms/page/edit/page_id/2/",
|
||
"locator": "document.querySelector('input[name=\"title\"]').value",
|
||
"required_contents": {
|
||
"exact_match": "This is the home page!! Leave here!!"
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 275,
|
||
"old_task_id": 488
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 114,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Notify {{name}} in their most recent pending order with message \"{{message}}\"",
|
||
"instantiation_dict": {
|
||
"name": "Sarah Miller",
|
||
"message": "the order is ready to be shipped soon!"
|
||
},
|
||
"intent": "Notify Sarah Miller in their most recent pending order with message \"the order is ready to be shipped soon!\"",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"exact_match": "N/A"
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "System message: We cannot add order history."
|
||
},
|
||
"intent_template_id": 280,
|
||
"old_task_id": 491
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 115,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Update order #{{order}} with the {{service}} tracking number {{tracking}}",
|
||
"instantiation_dict": {
|
||
"tracking": "24353446464",
|
||
"order": "307",
|
||
"service": "DHL"
|
||
},
|
||
"intent": "Update order #307 with the DHL tracking number 24353446464",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__SHOPPING_ADMIN__/sales/order/commentsHistory/order_id/307/active_tab/order_shipments/",
|
||
"locator": "",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Tracking number 24353446464 for DHL assigned"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 284,
|
||
"old_task_id": 497
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 116,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Make all {{product}} as out of stock",
|
||
"instantiation_dict": {
|
||
"product": "Aeno capri"
|
||
},
|
||
"intent": "Make all Aeno capri as out of stock",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1861/",
|
||
"locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value",
|
||
"required_contents": {
|
||
"exact_match": "0"
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 287,
|
||
"old_task_id": 505
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 117,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Buy the highest rated product from the {{product_category}} category within a budget {{dollar_value}}.",
|
||
"instantiation_dict": {
|
||
"product_category": "meat substitute",
|
||
"dollar_value": "between 100 and 200"
|
||
},
|
||
"intent": "Buy the highest rated product from the meat substitute category within a budget between 100 and 200.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "func:shopping_get_latest_order_url()",
|
||
"locator": "document.querySelector(\".order-details-items.ordered\").outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"B01CTR3DLE"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 172,
|
||
"old_task_id": 506
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 118,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Buy the best rating product from \"{{category}}\" category with at least 5 reviews and the product is least expensive",
|
||
"instantiation_dict": {
|
||
"category": "Men's shoe"
|
||
},
|
||
"intent": "Buy the best rating product from \"Men's shoe\" category with at least 5 reviews and the product is least expensive",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "func:shopping_get_latest_order_url()",
|
||
"locator": "document.querySelector(\".order-details-items.ordered\").outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"B01J4MM3KO"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 216,
|
||
"old_task_id": 509
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 119,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Add a {{product}} to my wish list.",
|
||
"instantiation_dict": {
|
||
"product": "white desk"
|
||
},
|
||
"intent": "Add a white desk to my wish list.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__SHOPPING__/wishlist/",
|
||
"locator": "document.querySelector('.products-grid.wishlist').outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"white",
|
||
"desk"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 189,
|
||
"old_task_id": 514
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 120,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__/elmwood-inn-fine-teas-orange-vanilla-caffeine-free-fruit-infusion-16-ounce-pouch.html",
|
||
"geolocation": null,
|
||
"intent_template": "Add this product to my wishlist",
|
||
"instantiation_dict": {},
|
||
"intent": "Add this product to my wishlist",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__SHOPPING__/wishlist/",
|
||
"locator": "document.querySelector('.products-grid.wishlist').outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Elmwood Inn Fine Teas, Orange Vanilla Caffeine-free Fruit Infusion, 16-Ounce Pouch"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 196,
|
||
"old_task_id": 516
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 121,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Subscribe to the newsletter of OneStopMarket",
|
||
"instantiation_dict": {},
|
||
"intent": "Subscribe to the newsletter of OneStopMarket",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__SHOPPING__/newsletter/manage/",
|
||
"locator": "document.querySelector('[title=\"General Subscription\"').checked.toString()",
|
||
"required_contents": {
|
||
"exact_match": "true"
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 199,
|
||
"old_task_id": 521
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 122,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Star the top {{number}} most stared repos in Gitlab",
|
||
"instantiation_dict": {
|
||
"number": "eight"
|
||
},
|
||
"intent": "Star the top eight most stared repos in Gitlab",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__GITLAB__/users/byteblaze/starred",
|
||
"locator": "",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"AndroidSlidingUpPanel",
|
||
"create-react-app",
|
||
"ffmpeg-python",
|
||
"PHP_XLSXWriter",
|
||
"AndroidAsync",
|
||
"Pytorch-GAN",
|
||
"administrate",
|
||
"keycloak"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 354,
|
||
"old_task_id": 524
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 123,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Draft a refund message via their \"contact us\" form for the {{product}} I bought {{time}}. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet",
|
||
"instantiation_dict": {
|
||
"product": "phone screen protector",
|
||
"time": "March 2023"
|
||
},
|
||
"intent": "Draft a refund message via their \"contact us\" form for the phone screen protector I bought March 2023. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('[title=\"What’s on your mind?\"').value",
|
||
"required_contents": {
|
||
"fuzzy_match": [
|
||
"refund",
|
||
"it broke after three days of use",
|
||
"000000180",
|
||
"12.99"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 154,
|
||
"old_task_id": 528
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 124,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Follow {{account_list}} on Gitlab",
|
||
"instantiation_dict": {
|
||
"account_list": [
|
||
"Jakub Klinkovský",
|
||
"Koushik",
|
||
"Vinta Chen"
|
||
]
|
||
},
|
||
"intent": "Follow ['Jakub Klinkovský', 'Koushik', 'Vinta Chen'] on Gitlab",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__GITLAB__/users/byteblaze/following",
|
||
"locator": "document.querySelector('.user-profile').outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"@lahwaacz",
|
||
"@koush",
|
||
"@vinta"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 330,
|
||
"old_task_id": 534
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 125,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Modify the address of order #{{order_id}} to {{address}}",
|
||
"instantiation_dict": {
|
||
"order_id": "299",
|
||
"address": "456 Oak Avenue, New York, NY, 10001"
|
||
},
|
||
"intent": "Modify the address of order #299 to 456 Oak Avenue, New York, NY, 10001",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__SHOPPING_ADMIN__/sales/order/view/order_id/299",
|
||
"locator": "",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"456 Oak Avenue",
|
||
"New York",
|
||
"10001"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 240,
|
||
"old_task_id": 538
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 126,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Add a new {{option}} {{value}} to {{base_setting}} of {{product}}",
|
||
"instantiation_dict": {
|
||
"option": "color",
|
||
"value": "blue",
|
||
"base_setting": "size S and M",
|
||
"product": "Frankie Sweatshirt"
|
||
},
|
||
"intent": "Add a new color blue to size S and M of Frankie Sweatshirt",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__SHOPPING_ADMIN__/catalog/product/edit/id/110/",
|
||
"locator": "document.querySelector('[data-index=\"configurable\"').outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Sweatshirt-M-Blue",
|
||
"Sweatshirt-S-Blue"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 252,
|
||
"old_task_id": 548
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab",
|
||
"reddit"
|
||
],
|
||
"task_id": 127,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab.reddit_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "create a repository named {{name}} that includes a README file with the links to the most active {{num}} DIY ideas on DIY subreddit?",
|
||
"instantiation_dict": {
|
||
"name": "Do it myself",
|
||
"num": 8
|
||
},
|
||
"intent": "create a repository named Do it myself that includes a README file with the links to the most active 8 DIY ideas on DIY subreddit?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__GITLAB__/byteblaze/Do-it-myself/-/raw/main/README.md",
|
||
"locator": "",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"__REDDIT__/f/DIY/118903/separate-glued-plastic-parts",
|
||
"__REDDIT__/f/DIY/118923/how-would-you-fix-this-dryer-vent-mess",
|
||
"__REDDIT__/f/DIY/118935/basement-bulkhead-soffit-wall-framing",
|
||
"__REDDIT__/f/DIY/118904/ge-water-heater-pilot-light-won-t-stay-lit",
|
||
"__REDDIT__/f/DIY/118960/attempting-to-move-a-wall-outlet-in-my-basement-a-few-inches",
|
||
"__REDDIT__/f/DIY/118931/afci-outlet-question",
|
||
"__REDDIT__/f/DIY/118824/teflon-tape-to-attach-washing-machine-drain-hose-to-pipe",
|
||
"__REDDIT__/f/DIY/118866/paver-base-for-shed"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 88,
|
||
"old_task_id": 566
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 128,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Invite {{collaborator_account_list}} as collaborator to {{repo}} repo",
|
||
"instantiation_dict": {
|
||
"collaborator_account_list": "Jakub Klinkovský and Benoît Blanchon",
|
||
"repo": "gimmiethat.space"
|
||
},
|
||
"intent": "Invite Jakub Klinkovský and Benoît Blanchon as collaborator to gimmiethat.space repo",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__GITLAB__/byteblaze/gimmiethat.space/-/project_members",
|
||
"locator": "",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"@lahwaacz",
|
||
"@bblanchon"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 293,
|
||
"old_task_id": 567
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 129,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "I recently moved, my address is {{address}}, update my information on OneStopShopping accordingly",
|
||
"instantiation_dict": {
|
||
"address": "111 Magnolia Path, Atlanta, GA, 30303"
|
||
},
|
||
"intent": "I recently moved, my address is 111 Magnolia Path, Atlanta, GA, 30303, update my information on OneStopShopping accordingly",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__SHOPPING__/customer/address",
|
||
"locator": "document.querySelector(\".box.box-address-shipping > .box-content\").outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"111 Magnolia Path",
|
||
"Atlanta, Georgia, 30303"
|
||
]
|
||
}
|
||
},
|
||
{
|
||
"url": "__SHOPPING__/customer/address",
|
||
"locator": "document.querySelector(\".box.box-address-billing > .box-content\").outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"111 Magnolia Path",
|
||
"Atlanta, Georgia, 30303"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 165,
|
||
"old_task_id": 574
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 130,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Add the following users to my {{repo}} as {{role}}: {{user_list}}",
|
||
"instantiation_dict": {
|
||
"repo": "GitHub timeline item management extension",
|
||
"role": "maintainer",
|
||
"user_list": [
|
||
"abisubramanya27",
|
||
"lahwaacz"
|
||
]
|
||
},
|
||
"intent": "Add the following users to my GitHub timeline item management extension as maintainer: ['abisubramanya27', 'lahwaacz']",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__GITLAB__/byteblaze/remove-board-movement-events-from-the-github-issue-timeline/-/project_members",
|
||
"locator": "func:gitlab_get_project_memeber_role(__page__, 'abisubramanya27')",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Maintainer"
|
||
]
|
||
}
|
||
},
|
||
{
|
||
"url": "__GITLAB__/byteblaze/remove-board-movement-events-from-the-github-issue-timeline/-/project_members",
|
||
"locator": "func:gitlab_get_project_memeber_role(__page__, 'lahwaacz')",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Maintainer"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 351,
|
||
"old_task_id": 577
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit"
|
||
],
|
||
"task_id": 131,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/reddit_state.json",
|
||
"start_url": "__REDDIT__",
|
||
"geolocation": null,
|
||
"intent_template": "Create a new forum named {{name}}, with a description of {{description}}, and include {{sidebar_list}} in the sidebar?",
|
||
"instantiation_dict": {
|
||
"name": "Cyberpunk",
|
||
"description": "Welcome to the future",
|
||
"sidebar_list": [
|
||
"Games",
|
||
"Books",
|
||
"Movies",
|
||
"Future"
|
||
]
|
||
},
|
||
"intent": "Create a new forum named Cyberpunk, with a description of Welcome to the future, and include ['Games', 'Books', 'Movies', 'Future'] in the sidebar?",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__REDDIT__/f/Cyberpunk/edit",
|
||
"locator": "document.querySelector(\"#forum_description\").value",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Welcome to the future"
|
||
]
|
||
}
|
||
},
|
||
{
|
||
"url": "__REDDIT__/f/Cyberpunk/edit",
|
||
"locator": "document.querySelector(\"#forum_sidebar\").value",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Games",
|
||
"Books",
|
||
"Movies",
|
||
"Future"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 7,
|
||
"old_task_id": 582
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit"
|
||
],
|
||
"task_id": 132,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/reddit_state.json",
|
||
"start_url": "__REDDIT__",
|
||
"geolocation": null,
|
||
"intent_template": "Open the thread of a trending post on the forum \"{{subreddit}}\" and subscribe.",
|
||
"instantiation_dict": {
|
||
"subreddit": "machine learning"
|
||
},
|
||
"intent": "Open the thread of a trending post on the forum \"machine learning\" and subscribe.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match",
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__REDDIT__/f/MachineLearning",
|
||
"program_html": [
|
||
{
|
||
"url": "__REDDIT__/",
|
||
"locator": "document.querySelector(\"#sidebar > section\").outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"MachineLearning"
|
||
]
|
||
}
|
||
}
|
||
],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 4,
|
||
"old_task_id": 599
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit"
|
||
],
|
||
"task_id": 133,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/reddit_state.json",
|
||
"start_url": "__REDDIT__",
|
||
"geolocation": null,
|
||
"intent_template": "Post my question, \"{{question}}\", in a subreddit where I'm likely to get an answer",
|
||
"instantiation_dict": {
|
||
"question": "is car necessary in NYC"
|
||
},
|
||
"intent": "Post my question, \"is car necessary in NYC\", in a subreddit where I'm likely to get an answer",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match",
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__REDDIT__/f/nyc",
|
||
"program_html": [
|
||
{
|
||
"url": "func:reddit_get_post_url('__last_url__')",
|
||
"locator": "document.querySelector('.submission__inner').outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"is car necessary in NYC"
|
||
]
|
||
}
|
||
}
|
||
],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 3765,
|
||
"old_task_id": 601
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit"
|
||
],
|
||
"task_id": 134,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/reddit_state.json",
|
||
"start_url": "__REDDIT__",
|
||
"geolocation": null,
|
||
"intent_template": "Find a subreddit focused on topics related to {{topic}}, and post my question, \"{{question}}\" there",
|
||
"instantiation_dict": {
|
||
"topic": "gaming consoles",
|
||
"question": "what is the recommended console to buy these days"
|
||
},
|
||
"intent": "Find a subreddit focused on topics related to gaming consoles, and post my question, \"what is the recommended console to buy these days\" there",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match",
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__REDDIT__/f/consoles",
|
||
"program_html": [
|
||
{
|
||
"url": "func:reddit_get_post_url('__last_url__')",
|
||
"locator": "document.querySelector('.submission__inner').outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"what is the recommended console to buy these days"
|
||
]
|
||
}
|
||
}
|
||
],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 5,
|
||
"old_task_id": 605
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit"
|
||
],
|
||
"task_id": 135,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/reddit_state.json",
|
||
"start_url": "__REDDIT__",
|
||
"geolocation": null,
|
||
"intent_template": "Post a review of my recent reading \"{{book}}\" in the r/books with my comment \"{{content}}\".",
|
||
"instantiation_dict": {
|
||
"book": "big little lies",
|
||
"content": "can't stop it"
|
||
},
|
||
"intent": "Post a review of my recent reading \"big little lies\" in the r/books with my comment \"can't stop it\".",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match",
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__REDDIT__/f/books",
|
||
"program_html": [
|
||
{
|
||
"url": "func:reddit_get_post_url('__last_url__')",
|
||
"locator": "document.querySelector('.submission__inner').outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"big little lies",
|
||
"can't stop it"
|
||
]
|
||
}
|
||
}
|
||
],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 9,
|
||
"old_task_id": 612
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit"
|
||
],
|
||
"task_id": 136,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/reddit_state.json",
|
||
"start_url": "__REDDIT__/f/pics",
|
||
"geolocation": null,
|
||
"intent_template": "Re-post the image of {{content}} in this page to {{subreddit}} subreddit and note \"from /f/pics\"",
|
||
"instantiation_dict": {
|
||
"content": "costume contest",
|
||
"subreddit": "funny"
|
||
},
|
||
"intent": "Re-post the image of costume contest in this page to funny subreddit and note \"from /f/pics\"",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match",
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__REDDIT__/f/funny",
|
||
"program_html": [
|
||
{
|
||
"url": "func:reddit_get_post_url('__last_url__')",
|
||
"locator": "document.querySelector('.submission__inner').outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"from /f/pics"
|
||
]
|
||
}
|
||
},
|
||
{
|
||
"url": "func:reddit_get_post_url('__last_url__')",
|
||
"locator": "[...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"bd8bc5f4c846aac4df08626faa3a34a7d47c8f3bdd92bf615a54afd939f063a7.jpg"
|
||
]
|
||
}
|
||
}
|
||
],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 11,
|
||
"old_task_id": 619
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit"
|
||
],
|
||
"task_id": 137,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/reddit_state.json",
|
||
"start_url": "__REDDIT__",
|
||
"geolocation": null,
|
||
"intent_template": "Create a discussion post about \"{{topic}}\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"",
|
||
"instantiation_dict": {
|
||
"topic": "Iphone 14"
|
||
},
|
||
"intent": "Create a discussion post about \"Iphone 14\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match",
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__REDDIT__/f/iphone",
|
||
"program_html": [
|
||
{
|
||
"url": "func:reddit_get_post_url('__last_url__')",
|
||
"locator": "document.querySelector('.submission__inner').outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"your opinion",
|
||
"Iphone 14"
|
||
]
|
||
}
|
||
}
|
||
],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 13,
|
||
"old_task_id": 626
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit"
|
||
],
|
||
"task_id": 138,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/reddit_state.json",
|
||
"start_url": "__REDDIT__",
|
||
"geolocation": null,
|
||
"intent_template": "Ask for product recommendations for {{category}} within a budget of {{price}} in {{subreddit}}",
|
||
"instantiation_dict": {
|
||
"category": "running shoes",
|
||
"price": "$100",
|
||
"subreddit": "r/sports"
|
||
},
|
||
"intent": "Ask for product recommendations for running shoes within a budget of $100 in r/sports",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match",
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__REDDIT__/f/sports",
|
||
"program_html": [
|
||
{
|
||
"url": "func:reddit_get_post_url('__last_url__')",
|
||
"locator": "document.querySelector('.submission__inner').outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"100",
|
||
"running",
|
||
"shoes"
|
||
]
|
||
}
|
||
}
|
||
],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 15,
|
||
"old_task_id": 631
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit"
|
||
],
|
||
"task_id": 139,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/reddit_state.json",
|
||
"start_url": "__REDDIT__",
|
||
"geolocation": null,
|
||
"intent_template": "Post a notice on a virtual meetup for {{interest}} enthusiasts on {{date}} in the {{subreddit}} subreddit",
|
||
"instantiation_dict": {
|
||
"interest": "Harry Potter",
|
||
"date": "July 8th",
|
||
"subreddit": "books"
|
||
},
|
||
"intent": "Post a notice on a virtual meetup for Harry Potter enthusiasts on July 8th in the books subreddit",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match",
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__REDDIT__/f/books",
|
||
"program_html": [
|
||
{
|
||
"url": "func:reddit_get_post_url('__last_url__')",
|
||
"locator": "document.querySelector('.submission__inner').outerText",
|
||
"required_contents": {
|
||
"fuzzy_match": [
|
||
"Harry Potter",
|
||
"July 8th",
|
||
"virtual meetup"
|
||
]
|
||
}
|
||
}
|
||
],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 16,
|
||
"old_task_id": 641
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit"
|
||
],
|
||
"task_id": 140,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/reddit_state.json",
|
||
"start_url": "__REDDIT__",
|
||
"geolocation": null,
|
||
"intent_template": "Post in {{subreddit}} subreddit about what could machine learning help the correpong field.",
|
||
"instantiation_dict": {
|
||
"subreddit": "books"
|
||
},
|
||
"intent": "Post in books subreddit about what could machine learning help the correpong field.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match",
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__REDDIT__/f/books",
|
||
"program_html": [
|
||
{
|
||
"url": "func:reddit_get_post_url('__last_url__')",
|
||
"locator": "document.querySelector('.submission__inner').outerText",
|
||
"required_contents": {
|
||
"fuzzy_match": [
|
||
"machine learning",
|
||
"help"
|
||
]
|
||
}
|
||
}
|
||
],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 19,
|
||
"old_task_id": 645
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit"
|
||
],
|
||
"task_id": 141,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/reddit_state.json",
|
||
"start_url": "__REDDIT__/f/technology/134852/ai-experts-disown-musk-backed-campaign-citing-their-research",
|
||
"geolocation": null,
|
||
"intent_template": "Reply to {{position_description}} with my comment \"{{content_description}}\"",
|
||
"instantiation_dict": {
|
||
"position_description": "the post",
|
||
"content_description": "???"
|
||
},
|
||
"intent": "Reply to the post with my comment \"???\"",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match",
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__REDDIT__/f/technology/134852/ai-experts-disown-musk-backed-campaign-citing-their-research",
|
||
"program_html": [
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('.comment__body').outerText",
|
||
"required_contents": {
|
||
"exact_match": "???"
|
||
}
|
||
}
|
||
],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 23,
|
||
"old_task_id": 652
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 142,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Fill the \"contact us\" form in the site for a refund on the {{product}} I bought, stating that it broke after just three days of use. Also, ensure to include the order number #{{order_id}} and the product SKU. Don't submit yet, I will check.",
|
||
"instantiation_dict": {
|
||
"product": "speaker",
|
||
"order_id": "148"
|
||
},
|
||
"intent": "Fill the \"contact us\" form in the site for a refund on the speaker I bought, stating that it broke after just three days of use. Also, ensure to include the order number #148 and the product SKU. Don't submit yet, I will check.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match",
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__SHOPPING__/contact",
|
||
"program_html": [
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('[title=\"What’s on your mind?\"').value",
|
||
"required_contents": {
|
||
"fuzzy_match": [
|
||
"refund",
|
||
"broke after three days of use",
|
||
"148",
|
||
"B003FVW3VA"
|
||
]
|
||
}
|
||
}
|
||
],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 153,
|
||
"old_task_id": 657
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 143,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Submit a merge request for {{source_branch}} branch to be merged into {{target_branch}} branch, assign {{reviewer}} as the reviewer",
|
||
"instantiation_dict": {
|
||
"source_branch": "a11yproject.com/redesign",
|
||
"target_branch": "master",
|
||
"reviewer": "Roshan Jossy"
|
||
},
|
||
"intent": "Submit a merge request for a11yproject.com/redesign branch to be merged into master branch, assign Roshan Jossy as the reviewer",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match",
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/merge_requests",
|
||
"program_html": [
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[0].outerText",
|
||
"required_contents": {
|
||
"exact_match": "redesign"
|
||
}
|
||
},
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[1].outerText",
|
||
"required_contents": {
|
||
"exact_match": "main"
|
||
}
|
||
},
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('.block.reviewer').outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Roshan Jossy"
|
||
]
|
||
}
|
||
}
|
||
],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 335,
|
||
"old_task_id": 668
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping",
|
||
"reddit"
|
||
],
|
||
"task_id": 144,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/reddit_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Gather the titles of {{product}} reviews with {{rating}} rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on {{product}}\"",
|
||
"instantiation_dict": {
|
||
"product": "Racing Wheel Overdrive for Xbox X",
|
||
"rating": "1 star"
|
||
},
|
||
"intent": "Gather the titles of Racing Wheel Overdrive for Xbox X reviews with 1 star rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on Racing Wheel Overdrive for Xbox X\"",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match",
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__REDDIT__/f/gaming",
|
||
"program_html": [
|
||
{
|
||
"url": "func:reddit_get_post_url('__last_url__')",
|
||
"locator": "document.querySelector('.submission__title').outerText",
|
||
"required_contents": {
|
||
"exact_match": "real user feedback on Racing Wheel Overdrive for Xbox X"
|
||
}
|
||
},
|
||
{
|
||
"url": "func:reddit_get_post_url('__last_url__')",
|
||
"locator": "document.querySelector('.submission__body').outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Unable to set neutral steering",
|
||
"Doesn’t work with PC",
|
||
"Crazy problems in automatic mode",
|
||
"pedals stopped working",
|
||
"Only works with certain games"
|
||
]
|
||
}
|
||
}
|
||
],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 101,
|
||
"old_task_id": 673
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 145,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Lookup orders that are {{status}}",
|
||
"instantiation_dict": {
|
||
"status": "canceled"
|
||
},
|
||
"intent": "Lookup orders that are canceled",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match",
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__SHOPPING_ADMIN__/sales/order/",
|
||
"program_html": [
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector(\"div.admin__data-grid-filters-current\").outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Canceled"
|
||
]
|
||
}
|
||
}
|
||
],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 253,
|
||
"old_task_id": 678
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit",
|
||
"gitlab"
|
||
],
|
||
"task_id": 146,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab.reddit_state.json",
|
||
"start_url": "__REDDIT__",
|
||
"geolocation": null,
|
||
"intent_template": "Find a GitLab repository related to {{topic}} and make a Reddit post linking to it in a relevant subreddit",
|
||
"instantiation_dict": {
|
||
"topic": "chatGPT"
|
||
},
|
||
"intent": "Find a GitLab repository related to chatGPT and make a Reddit post linking to it in a relevant subreddit",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match",
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__REDDIT__/f/machinelearning |OR| __REDDIT__/f/singularity |OR| __REDDIT__/f/deeplearning |OR| __REDDIT__/f/technology",
|
||
"program_html": [
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('.submission__inner').outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"chatgpt",
|
||
"__GITLAB__/convexegg/chatgpt"
|
||
]
|
||
}
|
||
}
|
||
],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 116,
|
||
"old_task_id": 682
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit",
|
||
"gitlab"
|
||
],
|
||
"task_id": 147,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab.reddit_state.json",
|
||
"start_url": "__REDDIT__",
|
||
"geolocation": null,
|
||
"intent_template": "Promote {{repo}} to subreddit {{subreddit}} with the description from the repo itself.",
|
||
"instantiation_dict": {
|
||
"repo": "auth0/angular-storage",
|
||
"subreddit": "technology"
|
||
},
|
||
"intent": "Promote auth0/angular-storage to subreddit technology with the description from the repo itself.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match",
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__REDDIT__/f/technology",
|
||
"program_html": [
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('.submission__inner').outerText + [...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"A storage library for AngularJS done right",
|
||
"__GITLAB__/auth0/angular-storage"
|
||
]
|
||
}
|
||
}
|
||
],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 117,
|
||
"old_task_id": 686
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 148,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Draft an email to the shop owner via their contact us function for a coupon as {{reason}}",
|
||
"instantiation_dict": {
|
||
"reason": "my refund is suppoed to be replaced by a coupon"
|
||
},
|
||
"intent": "Draft an email to the shop owner via their contact us function for a coupon as my refund is suppoed to be replaced by a coupon",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__SHOPPING__/contact/index/",
|
||
"program_html": [],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 163,
|
||
"old_task_id": 693
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 149,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Today is 3/15/2023, generate a {{report}} {{time_span}}",
|
||
"instantiation_dict": {
|
||
"report": "sales order report",
|
||
"time_span": "for last month"
|
||
},
|
||
"intent": "Today is 3/15/2023, generate a sales order report for last month",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match",
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__SHOPPING_ADMIN__/reports/report_sales/sales",
|
||
"program_html": [
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('[id=\"sales_report_from\"').value",
|
||
"required_contents": {
|
||
"exact_match": "2/1/23"
|
||
}
|
||
},
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('[id=\"sales_report_to\"').value",
|
||
"required_contents": {
|
||
"exact_match": "2/28/23"
|
||
}
|
||
}
|
||
],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 268,
|
||
"old_task_id": 704
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 150,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "Create a {{type}} report from {{start_date}} to {{end_date}}",
|
||
"instantiation_dict": {
|
||
"type": "shipping",
|
||
"start_date": "08/05/2022",
|
||
"end_date": "03/01/2023"
|
||
},
|
||
"intent": "Create a shipping report from 08/05/2022 to 03/01/2023",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match",
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__SHOPPING_ADMIN__/reports/report_sales/shipping",
|
||
"program_html": [
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('[id=\"sales_report_from\"').value",
|
||
"required_contents": {
|
||
"exact_match": "8/5/22"
|
||
}
|
||
},
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('[id=\"sales_report_to\"').value",
|
||
"required_contents": {
|
||
"exact_match": "3/1/23"
|
||
}
|
||
}
|
||
],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 271,
|
||
"old_task_id": 710
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit"
|
||
],
|
||
"task_id": 151,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/reddit_state.json",
|
||
"start_url": "__REDDIT__",
|
||
"geolocation": null,
|
||
"intent_template": "Thumbs down the top {{k}} post ever in {{subreddit}}.",
|
||
"instantiation_dict": {
|
||
"subreddit": "gadgets",
|
||
"k": "1"
|
||
},
|
||
"intent": "Thumbs down the top 1 post ever in gadgets.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__REDDIT__/f/gadgets/19459/a-custom-gaming-pc-built-inside-a-vintage-1940s-motorola",
|
||
"locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"vote vote--user-downvoted"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 24,
|
||
"old_task_id": 714
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit"
|
||
],
|
||
"task_id": 152,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/reddit_state.json",
|
||
"start_url": "__REDDIT__",
|
||
"geolocation": null,
|
||
"intent_template": "Like all submissions created by {{user}} in subreddit {{subreddit}}",
|
||
"instantiation_dict": {
|
||
"subreddit": "earthporn",
|
||
"user": "CameronKelsey"
|
||
},
|
||
"intent": "Like all submissions created by CameronKelsey in subreddit earthporn",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__REDDIT__/f/EarthPorn/98332/my-favorite-place-on-the-planet-henry-s-fork-of-the-snake",
|
||
"locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"vote vote--user-upvoted"
|
||
]
|
||
}
|
||
},
|
||
{
|
||
"url": "__REDDIT__/f/EarthPorn/98297/2-years-later-this-is-still-one-of-the-most-incredible",
|
||
"locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"vote vote--user-upvoted"
|
||
]
|
||
}
|
||
},
|
||
{
|
||
"url": "__REDDIT__/f/EarthPorn/98256/i-can-t-wait-for-all-this-green-to-start-coming-back-little",
|
||
"locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"vote vote--user-upvoted"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 25,
|
||
"old_task_id": 720
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit"
|
||
],
|
||
"task_id": 153,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/reddit_state.json",
|
||
"start_url": "__REDDIT__",
|
||
"geolocation": null,
|
||
"intent_template": "DisLike all submissions created by {{user}} in subreddit {{subreddit}}",
|
||
"instantiation_dict": {
|
||
"user": "AdamCannon",
|
||
"subreddit": "UpliftingNews"
|
||
},
|
||
"intent": "DisLike all submissions created by AdamCannon in subreddit UpliftingNews",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__REDDIT__/f/UpliftingNews/16087/same-sex-marriage-is-now-legal-in-all-of-mexico-s-states",
|
||
"locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"vote vote--user-downvoted"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 1510,
|
||
"old_task_id": 729
|
||
},
|
||
{
|
||
"sites": [
|
||
"reddit"
|
||
],
|
||
"task_id": 154,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/reddit_state.json",
|
||
"start_url": "__REDDIT__",
|
||
"geolocation": null,
|
||
"intent_template": "Edit my post on {{post}} by adding a line to the body that says \"{{content}}\"",
|
||
"instantiation_dict": {
|
||
"post": "Star Trek Starfleet Academy series",
|
||
"content": "Every watch makes me feel like a kid again"
|
||
},
|
||
"intent": "Edit my post on Star Trek Starfleet Academy series by adding a line to the body that says \"Every watch makes me feel like a kid again\"",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__REDDIT__/f/television/135201/star-trek-starfleet-academy-series-from-alex-kurtzman-and",
|
||
"locator": "document.querySelector('.submission__body').outerText",
|
||
"required_contents": {
|
||
"exact_match": "Every watch makes me feel like a kid again"
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 27,
|
||
"old_task_id": 733
|
||
},
|
||
{
|
||
"sites": [
|
||
"wikipedia",
|
||
"map"
|
||
],
|
||
"task_id": 155,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "Show me the way from {{location}} to the home stadium of {{sport_team}} {{time}}",
|
||
"instantiation_dict": {
|
||
"location": "Carnegie Mellon University",
|
||
"sport_team": "Boston home NBA team",
|
||
"time": ""
|
||
},
|
||
"intent": "Show me the way from Carnegie Mellon University to the home stadium of Boston home NBA team ",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('[name=\"route_from\"').value",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Carnegie Mellon University",
|
||
"Pittsburgh"
|
||
]
|
||
}
|
||
},
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('[name=\"route_to\"').value",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"TD Garden",
|
||
"Boston",
|
||
"Massachusetts"
|
||
]
|
||
}
|
||
},
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex",
|
||
"required_contents": {
|
||
"exact_match": "1"
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 94,
|
||
"old_task_id": 741
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 156,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Create a new {{scope}} project \"awesome-llms\" and add {{account_list}} as members",
|
||
"instantiation_dict": {
|
||
"scope": "public",
|
||
"account_list": "primer, convexegg, abishek"
|
||
},
|
||
"intent": "Create a new public project \"awesome-llms\" and add primer, convexegg, abishek as members",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__GITLAB__/byteblaze/awesome-llms",
|
||
"locator": "document.querySelector('.visibility-icon').getAttribute('title')",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"public"
|
||
]
|
||
}
|
||
},
|
||
{
|
||
"url": "__GITLAB__/byteblaze/awesome-llms/-/project_members",
|
||
"locator": "",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"@primer",
|
||
"@convexegg",
|
||
"@abisubramanya27"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 332,
|
||
"old_task_id": 745
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 157,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Start a private project {{project_name}} with {{template}} template and add {{account_list}} as members",
|
||
"instantiation_dict": {
|
||
"project_name": "web_agent_android_xl",
|
||
"template": "Android",
|
||
"account_list": "primer, convexegg, abishek"
|
||
},
|
||
"intent": "Start a private project web_agent_android_xl with Android template and add primer, convexegg, abishek as members",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__GITLAB__/byteblaze/web_agent_android_xl",
|
||
"locator": "document.querySelector('.visibility-icon').getAttribute('title')",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Private"
|
||
]
|
||
}
|
||
},
|
||
{
|
||
"url": "__GITLAB__/byteblaze/web_agent_android_xl/-/commits",
|
||
"locator": "",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Initialized from 'Android' project template"
|
||
]
|
||
}
|
||
},
|
||
{
|
||
"url": "__GITLAB__/byteblaze/web_agent_android_xl/-/project_members",
|
||
"locator": "",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"@primer",
|
||
"@convexegg",
|
||
"@abisubramanya27"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 2100,
|
||
"old_task_id": 748
|
||
},
|
||
{
|
||
"sites": [
|
||
"map",
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 158,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "Show me the route and driving time from {{city1}} to {{city2}}",
|
||
"instantiation_dict": {
|
||
"city1": "Allentown, PA",
|
||
"city2": "the city where my E-commerce customer Amanda Kim lives"
|
||
},
|
||
"intent": "Show me the route and driving time from Allentown, PA to the city where my E-commerce customer Amanda Kim lives",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex",
|
||
"required_contents": {
|
||
"exact_match": "1"
|
||
}
|
||
},
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('[name=\"route_from\"').value",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Allentown"
|
||
]
|
||
}
|
||
},
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('[name=\"route_to\"').value",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Hoboken",
|
||
"New Jersey"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 42,
|
||
"old_task_id": 760
|
||
},
|
||
{
|
||
"sites": [
|
||
"map"
|
||
],
|
||
"task_id": 159,
|
||
"require_login": true,
|
||
"storage_state": null,
|
||
"start_url": "__MAP__",
|
||
"geolocation": null,
|
||
"intent_template": "Get directions from {{location/address_1}} to {{location/address_2}} using {{transportation}} options.",
|
||
"instantiation_dict": {
|
||
"location/address_1": "Carnegie Music Hall in NYC",
|
||
"location/address_2": "Carnegie Mellon University",
|
||
"transportation": "driving"
|
||
},
|
||
"intent": "Get directions from Carnegie Music Hall in NYC to Carnegie Mellon University using driving options.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex",
|
||
"required_contents": {
|
||
"exact_match": "1"
|
||
}
|
||
},
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('[name=\"route_from\"').value",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Carnegie Hall",
|
||
"West 57th Street",
|
||
"Manhattan",
|
||
"New York"
|
||
]
|
||
}
|
||
},
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('[name=\"route_to\"').value",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Carnegie Mellon University",
|
||
"Pittsburgh"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 54,
|
||
"old_task_id": 762
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping_admin"
|
||
],
|
||
"task_id": 160,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_admin_state.json",
|
||
"start_url": "__SHOPPING_ADMIN__",
|
||
"geolocation": null,
|
||
"intent_template": "{{quantity}} {{product}} arrived, update the stock",
|
||
"instantiation_dict": {
|
||
"quantity": "5",
|
||
"product": "blue Cronus yoga pants with size 33"
|
||
},
|
||
"intent": "5 blue Cronus yoga pants with size 33 arrived, update the stock",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__SHOPPING_ADMIN__/catalog/product/edit/id/872/",
|
||
"locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value",
|
||
"required_contents": {
|
||
"exact_match": "5"
|
||
}
|
||
},
|
||
{
|
||
"url": "__SHOPPING_ADMIN__/catalog/product/edit/id/872/",
|
||
"locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value",
|
||
"required_contents": {
|
||
"exact_match": "1"
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 241,
|
||
"old_task_id": 768
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab",
|
||
"reddit"
|
||
],
|
||
"task_id": 161,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab.reddit_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Make a folder named {{directory}} on the {{gitlab_repo}} repo and include a file called urls.txt that consists of the links to the 5 most recent posts from {{subreddit}}.",
|
||
"instantiation_dict": {
|
||
"directory": "car",
|
||
"subreddit": "cars",
|
||
"gitlab_repo": "gimmiethat.space"
|
||
},
|
||
"intent": "Make a folder named car on the gimmiethat.space repo and include a file called urls.txt that consists of the links to the 5 most recent posts from cars.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"exact_match": "N/A"
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "the subreddit does not exist",
|
||
"reference_answer_raw_annotation": "N/A"
|
||
},
|
||
"intent_template_id": 84,
|
||
"old_task_id": 791
|
||
},
|
||
{
|
||
"sites": [
|
||
"shopping"
|
||
],
|
||
"task_id": 162,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/shopping_state.json",
|
||
"start_url": "__SHOPPING__",
|
||
"geolocation": null,
|
||
"intent_template": "Change the delivery address for my most recent order to {{address}}.",
|
||
"instantiation_dict": {
|
||
"address": "77 Massachusetts Ave, Cambridge, MA"
|
||
},
|
||
"intent": "Change the delivery address for my most recent order to 77 Massachusetts Ave, Cambridge, MA.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"string_match"
|
||
],
|
||
"reference_answers": {
|
||
"must_include": [
|
||
"N/A"
|
||
]
|
||
},
|
||
"reference_url": "",
|
||
"program_html": [],
|
||
"string_note": "The shipping address cannot be changed after an order is placed in the website.",
|
||
"reference_answer_raw_annotation": "N/A"
|
||
},
|
||
"intent_template_id": 191,
|
||
"old_task_id": 798
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 163,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Create an issue in {{repo}} repo with title \"{{issue}}\". Assign the issue to {{account}}. Set due date to be {{due}}",
|
||
"instantiation_dict": {
|
||
"repo": "a11yproject",
|
||
"issue": "404 for many URLs",
|
||
"account": "myself",
|
||
"due": "2030-1-3"
|
||
},
|
||
"intent": "Create an issue in a11yproject repo with title \"404 for many URLs\". Assign the issue to myself. Set due date to be 2030-1-3",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"url_match",
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/issues",
|
||
"program_html": [
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('[data-qa-selector=\"title_content\"]').outerText",
|
||
"required_contents": {
|
||
"exact_match": "404 for many URLs"
|
||
}
|
||
},
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('[data-testid=\"sidebar-due-date\"').outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Jan 3, 2030"
|
||
]
|
||
}
|
||
},
|
||
{
|
||
"url": "last",
|
||
"locator": "document.querySelector('.block.assignee').outerText",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"Byte Blaze"
|
||
]
|
||
}
|
||
}
|
||
],
|
||
"url_note": "GOLD in PRED"
|
||
},
|
||
"intent_template_id": 327,
|
||
"old_task_id": 809
|
||
},
|
||
{
|
||
"sites": [
|
||
"gitlab"
|
||
],
|
||
"task_id": 164,
|
||
"require_login": true,
|
||
"storage_state": "./.auth/gitlab_state.json",
|
||
"start_url": "__GITLAB__",
|
||
"geolocation": null,
|
||
"intent_template": "Assign the issue regarding {{issue}} in {{repo}} to {{account}}.",
|
||
"instantiation_dict": {
|
||
"repo": "a11yproject",
|
||
"issue": 404,
|
||
"account": "myself"
|
||
},
|
||
"intent": "Assign the issue regarding 404 in a11yproject to myself.",
|
||
"require_reset": false,
|
||
"eval": {
|
||
"eval_types": [
|
||
"program_html"
|
||
],
|
||
"reference_answers": null,
|
||
"reference_url": "",
|
||
"program_html": [
|
||
{
|
||
"url": "__GITLAB__/dashboard/issues?scope=all&state=opened&assignee_username=byteblaze",
|
||
"locator": "",
|
||
"required_contents": {
|
||
"must_include": [
|
||
"404s, bad host, timeouts, bad urls for URLs linked from website"
|
||
]
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"intent_template_id": 999,
|
||
"old_task_id": 811
|
||
}
|
||
] |