diff --git a/.gitignore b/.gitignore index 5f05017..98dc5ff 100644 --- a/.gitignore +++ b/.gitignore @@ -25,4 +25,12 @@ data/minecraft src/server/tasks/minecraft/vab_minecraft_src/jarvis/stark_tech/MCP-Reborn src/server/tasks/minecraft/vab_minecraft_src/jarvis/steveI/weights src/server/tasks/minecraft/vab_minecraft_src/jarvis/global_configs/envs/* -!src/server/tasks/minecraft/vab_minecraft_src/jarvis/global_configs/envs/jarvis.yaml \ No newline at end of file +!src/server/tasks/minecraft/vab_minecraft_src/jarvis/global_configs/envs/jarvis.yaml + +VAB-WebArena-Lite/config_files/wa/test_webarena +VAB-WebArena-Lite/config_files/wa/test_webarena_lite + +VAB-WebArena-Lite/log_files + +VAB-WebArena-Lite/results + diff --git a/VAB-WebArena-Lite/.auth/gitlab.reddit_state.json b/VAB-WebArena-Lite/.auth/gitlab.reddit_state.json new file mode 100644 index 0000000..84d2f13 --- /dev/null +++ b/VAB-WebArena-Lite/.auth/gitlab.reddit_state.json @@ -0,0 +1 @@ +{"cookies": [{"name": "_cookie_check", "value": "1", "domain": "localhost", "path": "/login", "expires": -1, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "REMEMBERME", "value": "QXBwXEVudGl0eVxVc2VyOlRXRnlkbVZzYzBkeVlXNTBUV0Z1TVRNMjoxNzc2OTA5MzI5OlltVTFZalF5Tldaa05qaG1Oamt6WWpJMllXRTVOVEE0TW1GaFl6ZGpaV0V4WVdFMk1HSmlNamsyTjJVd01qVXdaakkwTVRSaVlURTNNRGhpTVROa1lnPT0%3D", "domain": "localhost", "path": "/", "expires": 1776909329.082441, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "PHPSESSID", "value": "c002205f152b213deaedc0800c372f3f", "domain": "localhost", "path": "/", "expires": -1, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "known_sign_in", "value": "UlVrNWNuN1g3cDRWdjNiUmJZUVl3OGNZNmJGQmlhYll6WFN0ZDdXNFpaZnBWNk1VLzBFKyt0eDJWQ2F1VkhkUndNR1hjeHIybnNzYlY5RC8wbFVWejJrclpUSUZZR2g1T0ZJQTRVVlZMUGVjKzdiMmhWVk1yR2czb1pzYXAzQWstLW8vYnlFdTlMYmZHUXBTZENLRDVBNlE9PQ%3D%3D--4b1888d4ebbdb533cf162356c2630ea53dd95286", "domain": "localhost", "path": "/", "expires": 1746582930.098554, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "_gitlab_session", "value": "037c2d46106a9cea55da705dce5b5708", "domain": "localhost", "path": "/", "expires": -1, "httpOnly": true, "secure": false, "sameSite": "Lax"}], "origins": []} \ No newline at end of file diff --git a/VAB-WebArena-Lite/.auth/gitlab.shopping_admin_state.json b/VAB-WebArena-Lite/.auth/gitlab.shopping_admin_state.json new file mode 100644 index 0000000..286e6d4 --- /dev/null +++ b/VAB-WebArena-Lite/.auth/gitlab.shopping_admin_state.json @@ -0,0 +1 @@ +{"cookies": [{"name": "admin", "value": "1087c582d275f78834da637c08d2dd0c", "domain": "localhost", "path": "/admin", "expires": 1745733329.394644, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "known_sign_in", "value": "UW1ZRyt2cTd2VmltKzMzWGVDOWovVi9HUkwyU2lYQ3BteUdGNUFyNHpNNHpMVldsWEdTZnJ6TDM5ekpxaDFpSTNGTXAvWTBDOGtWRUp6YWlWbE5UdEJXZzM5a0FvUGQ2V0c5dHJib1J6b0VrNWhVQTgvOTZxTnVtd3NHMjBWQTktLXJub3VnR0RISHFkV1Y2QmtzRlg3Nnc9PQ%3D%3D--20fe4b8ba28a2bb065bc6691d0cc2733e25633c4", "domain": "localhost", "path": "/", "expires": 1746582930.401271, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "_gitlab_session", "value": "f98c1667e8eacdb4484308adbc9212fd", "domain": "localhost", "path": "/", "expires": -1, "httpOnly": true, "secure": false, "sameSite": "Lax"}], "origins": []} \ No newline at end of file diff --git a/VAB-WebArena-Lite/.auth/gitlab.shopping_state.json b/VAB-WebArena-Lite/.auth/gitlab.shopping_state.json new file mode 100644 index 0000000..89a1d17 --- /dev/null +++ b/VAB-WebArena-Lite/.auth/gitlab.shopping_state.json @@ -0,0 +1 @@ +{"cookies": [{"name": "mage-cache-storage", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "mage-cache-storage-section-invalidation", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "mage-messages", "value": "", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Strict"}, {"name": "recently_viewed_product", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "recently_viewed_product_previous", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "recently_compared_product", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "recently_compared_product_previous", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "product_data_storage", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "section_data_ids", "value": "{%22messages%22:null%2C%22customer%22:null%2C%22compare-products%22:null%2C%22last-ordered-items%22:null%2C%22cart%22:null%2C%22directory-data%22:null%2C%22captcha%22:null%2C%22instant-purchase%22:null%2C%22loggedAsCustomer%22:null%2C%22persistent%22:null%2C%22review%22:null%2C%22wishlist%22:null%2C%22recently_viewed_product%22:null%2C%22recently_compared_product%22:null%2C%22product_data_storage%22:null%2C%22paypal-billing-agreement%22:null}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "private_content_version", "value": "5a1361e59223072bebd2793394207496", "domain": "localhost", "path": "/", "expires": 1779933329.571442, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "PHPSESSID", "value": "177a7b68734a2083018b3fb119634917", "domain": "localhost", "path": "/", "expires": 1776909330.114376, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "X-Magento-Vary", "value": "9bf9a599123e6402b85cde67144717a08b817412", "domain": "localhost", "path": "/", "expires": 1776909330.114451, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "known_sign_in", "value": "VjEvQms0R1Y0WFJwNzhWcXJmWUQzTlM5aEZhS1FLd3pIaGcrRHZoUUMxb2RtK2gzWFVMK1N1YjA0QTJiV1NPL1NwQWYrc3dpRlU3U3Q1Q3p4T0RNbWd4UVl2Vk5pR2dSbVBFTUhrQUhQMnBJOXlxaGVsdG5ycXFkVDQ2dDBBejYtLWNYZ3pJREcxbkVXWjVsZE9LMnIvZkE9PQ%3D%3D--860cb1881ab774ae0647d031b6a58789ca2dc43f", "domain": "localhost", "path": "/", "expires": 1746582931.071908, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "_gitlab_session", "value": "23ff9045fd7ed259aa91a5056469be23", "domain": "localhost", "path": "/", "expires": -1, "httpOnly": true, "secure": false, "sameSite": "Lax"}], "origins": [{"origin": "http://localhost:28082", "localStorage": [{"name": "mage-cache-storage", "value": "{}"}, {"name": "product_data_storage", "value": "{}"}, {"name": "mage-cache-storage-section-invalidation", "value": "{\"messages\":true,\"customer\":true,\"compare-products\":true,\"last-ordered-items\":true,\"cart\":true,\"directory-data\":true,\"captcha\":true,\"instant-purchase\":true,\"loggedAsCustomer\":true,\"persistent\":true,\"review\":true,\"wishlist\":true,\"recently_viewed_product\":true,\"recently_compared_product\":true,\"product_data_storage\":true,\"paypal-billing-agreement\":true}"}, {"name": "mage-cache-timeout", "value": "\"2026-04-23T01:55:28.860Z\""}, {"name": "recently_compared_product_previous", "value": "{}"}, {"name": "recently_viewed_product", "value": "{}"}, {"name": "recently_compared_product", "value": "{}"}, {"name": "recently_viewed_product_previous", "value": "{}"}]}]} \ No newline at end of file diff --git a/VAB-WebArena-Lite/.auth/gitlab_state.json b/VAB-WebArena-Lite/.auth/gitlab_state.json new file mode 100644 index 0000000..56a4934 --- /dev/null +++ b/VAB-WebArena-Lite/.auth/gitlab_state.json @@ -0,0 +1 @@ +{"cookies": [{"name": "known_sign_in", "value": "WHpKbmQyNkhlWWhHcGljQmZmVGE2aFl0WFh3VWxGMTRRYjBsa3dNbVJZLzIrYWM5RUpwa0IxYU85Q291WTE5cE9SWVcvdC9xTjNBTFp2ZHdCTGxaaEx6RXpDVm0zS1BCVldyODdMYlZuTWRHNFF0K0JQZXVqcGhPWWVkMWZWMmktLVNCc3F6VlFFQjBFSGN0TnlKUnd2MEE9PQ%3D%3D--752f2d15a69617f851d3140acb5500b2564af08f", "domain": "localhost", "path": "/", "expires": 1746582928.781901, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "_gitlab_session", "value": "85d65f4d47be2142eee2a21e8bbcec79", "domain": "localhost", "path": "/", "expires": -1, "httpOnly": true, "secure": false, "sameSite": "Lax"}], "origins": []} \ No newline at end of file diff --git a/VAB-WebArena-Lite/.auth/reddit_state.json b/VAB-WebArena-Lite/.auth/reddit_state.json new file mode 100644 index 0000000..179360f --- /dev/null +++ b/VAB-WebArena-Lite/.auth/reddit_state.json @@ -0,0 +1 @@ +{"cookies": [{"name": "_cookie_check", "value": "1", "domain": "localhost", "path": "/login", "expires": -1, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "REMEMBERME", "value": "QXBwXEVudGl0eVxVc2VyOlRXRnlkbVZzYzBkeVlXNTBUV0Z1TVRNMjoxNzc2OTA5MzI4OlpUQTRNV1F3WldNMU1HSmhOalUwTTJaaE1qWmxOVGs1TldNMlpUVTFNVEUyT1RRNFpqTXdaR1F3TVRRelpUTXhPVEl3TjJSa09USm1PREF4Wm1WaE5nPT0%3D", "domain": "localhost", "path": "/", "expires": 1776909328.956723, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "PHPSESSID", "value": "4a07cf057d5020f791451af6c2bc29e0", "domain": "localhost", "path": "/", "expires": -1, "httpOnly": true, "secure": false, "sameSite": "Lax"}], "origins": []} \ No newline at end of file diff --git a/VAB-WebArena-Lite/.auth/shopping.shopping_admin_state.json b/VAB-WebArena-Lite/.auth/shopping.shopping_admin_state.json new file mode 100644 index 0000000..6fc9a81 --- /dev/null +++ b/VAB-WebArena-Lite/.auth/shopping.shopping_admin_state.json @@ -0,0 +1 @@ +{"cookies": [{"name": "admin", "value": "732e6b7c1a7ab1de0d1a4ab105b549b6", "domain": "localhost", "path": "/admin", "expires": 1745733331.781848, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "mage-cache-storage", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "mage-cache-storage-section-invalidation", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "mage-messages", "value": "", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Strict"}, {"name": "recently_viewed_product", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "recently_viewed_product_previous", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "recently_compared_product", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "recently_compared_product_previous", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "product_data_storage", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "section_data_ids", "value": "{%22messages%22:null%2C%22customer%22:null%2C%22compare-products%22:null%2C%22last-ordered-items%22:null%2C%22cart%22:null%2C%22directory-data%22:null%2C%22captcha%22:null%2C%22instant-purchase%22:null%2C%22loggedAsCustomer%22:null%2C%22persistent%22:null%2C%22review%22:null%2C%22wishlist%22:null%2C%22recently_viewed_product%22:null%2C%22recently_compared_product%22:null%2C%22product_data_storage%22:null%2C%22paypal-billing-agreement%22:null}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "private_content_version", "value": "c694b83ea1426b503effa8c412f7ae6d", "domain": "localhost", "path": "/", "expires": 1779933329.584947, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "PHPSESSID", "value": "ac8e0a869234bb4b31a917ff6a016ce9", "domain": "localhost", "path": "/", "expires": 1776909330.149523, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "X-Magento-Vary", "value": "9bf9a599123e6402b85cde67144717a08b817412", "domain": "localhost", "path": "/", "expires": 1776909330.149613, "httpOnly": true, "secure": false, "sameSite": "Lax"}], "origins": [{"origin": "http://localhost:28082", "localStorage": [{"name": "mage-cache-storage", "value": "{}"}, {"name": "product_data_storage", "value": "{}"}, {"name": "mage-cache-storage-section-invalidation", "value": "{\"messages\":true,\"customer\":true,\"compare-products\":true,\"last-ordered-items\":true,\"cart\":true,\"directory-data\":true,\"captcha\":true,\"instant-purchase\":true,\"loggedAsCustomer\":true,\"persistent\":true,\"review\":true,\"wishlist\":true,\"recently_viewed_product\":true,\"recently_compared_product\":true,\"product_data_storage\":true,\"paypal-billing-agreement\":true}"}, {"name": "mage-cache-timeout", "value": "\"2026-04-23T01:55:28.879Z\""}, {"name": "recently_compared_product_previous", "value": "{}"}, {"name": "recently_viewed_product", "value": "{}"}, {"name": "recently_compared_product", "value": "{}"}, {"name": "recently_viewed_product_previous", "value": "{}"}]}]} \ No newline at end of file diff --git a/VAB-WebArena-Lite/.auth/shopping_admin_state.json b/VAB-WebArena-Lite/.auth/shopping_admin_state.json new file mode 100644 index 0000000..82cda7c --- /dev/null +++ b/VAB-WebArena-Lite/.auth/shopping_admin_state.json @@ -0,0 +1 @@ +{"cookies": [{"name": "admin", "value": "753cf7bedc7f97223f77af0369ec4ccf", "domain": "localhost", "path": "/admin", "expires": 1745733329.396607, "httpOnly": true, "secure": false, "sameSite": "Lax"}], "origins": []} \ No newline at end of file diff --git a/VAB-WebArena-Lite/.auth/shopping_state.json b/VAB-WebArena-Lite/.auth/shopping_state.json new file mode 100644 index 0000000..990a365 --- /dev/null +++ b/VAB-WebArena-Lite/.auth/shopping_state.json @@ -0,0 +1 @@ +{"cookies": [{"name": "mage-cache-storage", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "mage-cache-storage-section-invalidation", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "mage-messages", "value": "", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Strict"}, {"name": "recently_viewed_product", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "recently_viewed_product_previous", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "recently_compared_product", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "recently_compared_product_previous", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "product_data_storage", "value": "{}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "section_data_ids", "value": "{%22messages%22:null%2C%22customer%22:null%2C%22compare-products%22:null%2C%22last-ordered-items%22:null%2C%22cart%22:null%2C%22directory-data%22:null%2C%22captcha%22:null%2C%22instant-purchase%22:null%2C%22loggedAsCustomer%22:null%2C%22persistent%22:null%2C%22review%22:null%2C%22wishlist%22:null%2C%22recently_viewed_product%22:null%2C%22recently_compared_product%22:null%2C%22product_data_storage%22:null%2C%22paypal-billing-agreement%22:null}", "domain": "localhost", "path": "/", "expires": 1776909328, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "private_content_version", "value": "db7c8357df43bdda4423b1d0be6cd1c0", "domain": "localhost", "path": "/", "expires": 1779933329.561383, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "PHPSESSID", "value": "9d34565b19e00a4310aba625db3a7fd7", "domain": "localhost", "path": "/", "expires": 1776909330.076342, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "X-Magento-Vary", "value": "9bf9a599123e6402b85cde67144717a08b817412", "domain": "localhost", "path": "/", "expires": 1776909330.076442, "httpOnly": true, "secure": false, "sameSite": "Lax"}], "origins": [{"origin": "http://localhost:28082", "localStorage": [{"name": "mage-cache-storage", "value": "{}"}, {"name": "product_data_storage", "value": "{}"}, {"name": "mage-cache-storage-section-invalidation", "value": "{\"messages\":true,\"customer\":true,\"compare-products\":true,\"last-ordered-items\":true,\"cart\":true,\"directory-data\":true,\"captcha\":true,\"instant-purchase\":true,\"loggedAsCustomer\":true,\"persistent\":true,\"review\":true,\"wishlist\":true,\"recently_viewed_product\":true,\"recently_compared_product\":true,\"product_data_storage\":true,\"paypal-billing-agreement\":true}"}, {"name": "mage-cache-timeout", "value": "\"2026-04-23T01:55:28.896Z\""}, {"name": "recently_compared_product_previous", "value": "{}"}, {"name": "recently_viewed_product", "value": "{}"}, {"name": "recently_compared_product", "value": "{}"}, {"name": "recently_viewed_product_previous", "value": "{}"}]}]} \ No newline at end of file diff --git a/VAB-WebArena-Lite/CITATION.cff b/VAB-WebArena-Lite/CITATION.cff new file mode 100644 index 0000000..f5cea1b --- /dev/null +++ b/VAB-WebArena-Lite/CITATION.cff @@ -0,0 +1,6 @@ +@article{koh2024visualwebarena, + title={VisualWebArena: Evaluating Multimodal Agents on Realistic Visual Web Tasks}, + author={Koh, Jing Yu and Lo, Robert and Jang, Lawrence and Duvvur, Vikram and Lim, Ming Chong and Huang, Po-Yu and Neubig, Graham and Zhou, Shuyan and Salakhutdinov, Ruslan and Fried, Daniel}, + journal={arXiv preprint arXiv:24xx.xxxxx}, + year={2024} +} diff --git a/VAB-WebArena-Lite/LICENSE b/VAB-WebArena-Lite/LICENSE new file mode 100644 index 0000000..75e3041 --- /dev/null +++ b/VAB-WebArena-Lite/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2024 Jing Yu Koh, Robert Lo, Lawrence Jang, Vikram Duvvur, Ming Chong Lim, and Po-Yu Huang + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/VAB-WebArena-Lite/agent/__init__.py b/VAB-WebArena-Lite/agent/__init__.py new file mode 100644 index 0000000..9028d30 --- /dev/null +++ b/VAB-WebArena-Lite/agent/__init__.py @@ -0,0 +1,8 @@ +from .agent import ( + Agent, + PromptAgent, + TeacherForcingAgent, + construct_agent, +) + +__all__ = ["Agent", "TeacherForcingAgent", "PromptAgent", "construct_agent"] diff --git a/VAB-WebArena-Lite/new/agent.py b/VAB-WebArena-Lite/agent/agent.py similarity index 100% rename from VAB-WebArena-Lite/new/agent.py rename to VAB-WebArena-Lite/agent/agent.py diff --git a/VAB-WebArena-Lite/agent/prompts/__init__.py b/VAB-WebArena-Lite/agent/prompts/__init__.py new file mode 100644 index 0000000..3f3caba --- /dev/null +++ b/VAB-WebArena-Lite/agent/prompts/__init__.py @@ -0,0 +1 @@ +from .prompt_constructor import * diff --git a/VAB-WebArena-Lite/agent/prompts/jsons/p_cot_id_actree_2s.json b/VAB-WebArena-Lite/agent/prompts/jsons/p_cot_id_actree_2s.json new file mode 100644 index 0000000..9d2eae4 --- /dev/null +++ b/VAB-WebArena-Lite/agent/prompts/jsons/p_cot_id_actree_2s.json @@ -0,0 +1,27 @@ +{ + "intro": "You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue.\n\nHere's the information you'll have:\nThe user's objective: This is the task you're trying to complete.\nThe current web page's accessibility tree: This is a simplified representation of the webpage, providing key information.\nThe current web page's URL: This is the page you're currently navigating.\nThe open tabs: These are the tabs you have open.\nThe previous action: This is the action you just performed. It may be helpful to track your progress.\n\nThe actions you can perform fall into several categories:\n\nPage Operation Actions:\n`click [id]`: This action clicks on an element with a specific id on the webpage.\n`type [id] [content] [press_enter_after=0|1]`: Use this to type the content into the field with id. By default, the \"Enter\" key is pressed after typing unless press_enter_after is set to 0.\n`hover [id]`: Hover over an element with id.\n`press [key_comb]`: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).\n`scroll [direction=down|up]`: Scroll the page up or down.\n\nTab Management Actions:\n`new_tab`: Open a new, empty browser tab.\n`tab_focus [tab_index]`: Switch the browser's focus to a specific tab using its index.\n`close_tab`: Close the currently active tab.\n\nURL Navigation Actions:\n`goto [url]`: Navigate to a specific URL.\n`go_back`: Navigate to the previously viewed page.\n`go_forward`: Navigate to the next page (if a previous 'go_back' action was performed).\n\nCompletion Action:\n`stop [answer]`: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket. If you believe the task is impossible to complete, provide the answer as \"N/A\" in the bracket.\n\nHomepage:\nIf you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.\nhttp://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites.\n\nTo be successful, it is very important to follow the following rules:\n1. You should only issue an action that is valid given the current observation\n2. You should only issue one action at a time.\n3. You should follow the examples to reason step by step and then issue the next action.\n4. Generate the action in the correct format. Start with a \"In summary, the next action I will perform is\" phrase, followed by action inside ``````. For example, \"In summary, the next action I will perform is ```click [1234]```\".\n5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.", + "examples": [ + [ + "OBSERVATION:\n[1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)'\n\t\t[1749] StaticText '$279.49'\n\t\t[1757] button 'Add to Cart'\n\t\t[1760] button 'Add to Wish List'\n\t\t[1761] button 'Add to Compare'\nURL: http://onestopmarket.com/office-products/office-electronics.html\nOBJECTIVE: What is the price of HP Inkjet Fax Machine\nPREVIOUS ACTION: None", + "Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```" + ], + [ + "OBSERVATION:\n[164] textbox 'Search' focused: True required: False\n[171] button 'Go'\n[174] link 'Find directions between two points'\n[212] heading 'Search Results'\n[216] button 'Close'\nURL: http://openstreetmap.org\nOBJECTIVE: Show me the restaurants near CMU\nPREVIOUS ACTION: None", + "Let's think step-by-step. This page has a search box whose ID is [164]. According to the nominatim rule of openstreetmap, I can search for the restaurants near a location by \"restaurants near\". I can submit my typing by pressing the Enter afterwards. In summary, the next action I will perform is ```type [164] [restaurants near CMU] [1]```" + ] + ], + "template": "OBSERVATION:\n{observation}\nURL: {url}\nOBJECTIVE: {objective}\nPREVIOUS ACTION: {previous_action}", + "meta_data": { + "observation": "accessibility_tree", + "action_type": "id_accessibility_tree", + "keywords": [ + "url", + "objective", + "observation", + "previous_action" + ], + "prompt_constructor": "CoTPromptConstructor", + "answer_phrase": "In summary, the next action I will perform is", + "action_splitter": "```" + } +} \ No newline at end of file diff --git a/VAB-WebArena-Lite/agent/prompts/jsons/p_cot_id_actree_2s_no_na.json b/VAB-WebArena-Lite/agent/prompts/jsons/p_cot_id_actree_2s_no_na.json new file mode 100644 index 0000000..6b0f23f --- /dev/null +++ b/VAB-WebArena-Lite/agent/prompts/jsons/p_cot_id_actree_2s_no_na.json @@ -0,0 +1,27 @@ +{ + "intro": "You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue.\n\nHere's the information you'll have:\nThe user's objective: This is the task you're trying to complete.\nThe current web page's accessibility tree: This is a simplified representation of the webpage, providing key information.\nThe current web page's URL: This is the page you're currently navigating.\nThe open tabs: These are the tabs you have open.\nThe previous action: This is the action you just performed. It may be helpful to track your progress.\n\nThe actions you can perform fall into several categories:\n\nPage Operation Actions:\n`click [id]`: This action clicks on an element with a specific id on the webpage.\n`type [id] [content] [press_enter_after=0|1]`: Use this to type the content into the field with id. By default, the \"Enter\" key is pressed after typing unless press_enter_after is set to 0.\n`hover [id]`: Hover over an element with id.\n`press [key_comb]`: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).\n`scroll [direction=down|up]`: Scroll the page up or down.\n\nTab Management Actions:\n`new_tab`: Open a new, empty browser tab.\n`tab_focus [tab_index]`: Switch the browser's focus to a specific tab using its index.\n`close_tab`: Close the currently active tab.\n\nURL Navigation Actions:\n`goto [url]`: Navigate to a specific URL.\n`go_back`: Navigate to the previously viewed page.\n`go_forward`: Navigate to the next page (if a previous 'go_back' action was performed).\n\nCompletion Action:\n`stop [answer]`: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket.\n\nHomepage:\nIf you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.\nhttp://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites.\n\nTo be successful, it is very important to follow the following rules:\n1. You should only issue an action that is valid given the current observation\n2. You should only issue one action at a time.\n3. You should follow the examples to reason step by step and then issue the next action.\n4. Generate the action in the correct format. Start with a \"In summary, the next action I will perform is\" phrase, followed by action inside ``````. For example, \"In summary, the next action I will perform is ```click [1234]```\".\n5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.", + "examples": [ + [ + "OBSERVATION:\n[1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)'\n\t\t[1749] StaticText '$279.49'\n\t\t[1757] button 'Add to Cart'\n\t\t[1760] button 'Add to Wish List'\n\t\t[1761] button 'Add to Compare'\nURL: http://onestopmarket.com/office-products/office-electronics.html\nOBJECTIVE: What is the price of HP Inkjet Fax Machine\nPREVIOUS ACTION: None", + "Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```" + ], + [ + "OBSERVATION:\n[164] textbox 'Search' focused: True required: False\n[171] button 'Go'\n[174] link 'Find directions between two points'\n[212] heading 'Search Results'\n[216] button 'Close'\nURL: http://openstreetmap.org\nOBJECTIVE: Show me the restaurants near CMU\nPREVIOUS ACTION: None", + "Let's think step-by-step. This page has a search box whose ID is [164]. According to the nominatim rule of openstreetmap, I can search for the restaurants near a location by \"restaurants near\". I can submit my typing by pressing the Enter afterwards. In summary, the next action I will perform is ```type [164] [restaurants near CMU] [1]```" + ] + ], + "template": "OBSERVATION:\n{observation}\nURL: {url}\nOBJECTIVE: {objective}\nPREVIOUS ACTION: {previous_action}", + "meta_data": { + "observation": "accessibility_tree", + "action_type": "id_accessibility_tree", + "keywords": [ + "url", + "objective", + "observation", + "previous_action" + ], + "prompt_constructor": "CoTPromptConstructor", + "answer_phrase": "In summary, the next action I will perform is", + "action_splitter": "```" + } +} \ No newline at end of file diff --git a/VAB-WebArena-Lite/agent/prompts/jsons/p_cot_id_actree_3s.json b/VAB-WebArena-Lite/agent/prompts/jsons/p_cot_id_actree_3s.json new file mode 100644 index 0000000..d2453ca --- /dev/null +++ b/VAB-WebArena-Lite/agent/prompts/jsons/p_cot_id_actree_3s.json @@ -0,0 +1,31 @@ +{ + "intro": "You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue.\n\nHere's the information you'll have:\nThe user's objective: This is the task you're trying to complete.\nThe current web page's accessibility tree: This is a simplified representation of the webpage, providing key information.\nThe current web page's URL: This is the page you're currently navigating.\nThe open tabs: These are the tabs you have open.\nThe previous action: This is the action you just performed. It may be helpful to track your progress.\n\nThe actions you can perform fall into several categories:\n\nPage Operation Actions:\n```click [id]```: This action clicks on an element with a specific id on the webpage.\n```type [id] [content]```: Use this to type the content into the field with id. By default, the \"Enter\" key is pressed after typing unless press_enter_after is set to 0, i.e., ```type [id] [content] [0]```.\n```hover [id]```: Hover over an element with id.\n```press [key_comb]```: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).\n```scroll [down]``` or ```scroll [up]```: Scroll the page up or down.\n\nTab Management Actions:\n```new_tab```: Open a new, empty browser tab.\n```tab_focus [tab_index]```: Switch the browser's focus to a specific tab using its index.\n```close_tab```: Close the currently active tab.\n\nURL Navigation Actions:\n```goto [url]```: Navigate to a specific URL.\n```go_back```: Navigate to the previously viewed page.\n```go_forward```: Navigate to the next page (if a previous 'go_back' action was performed).\n\nCompletion Action:\n```stop [answer]```: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket.\n\nHomepage:\nIf you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.\nhttp://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites.\n\nTo be successful, it is very important to follow the following rules:\n1. You should only issue an action that is valid given the current observation\n2. You should only issue one action at a time.\n3. You should follow the examples to reason step by step and then issue the next action.\n4. Generate the action in the correct format. Start with a \"In summary, the next action I will perform is\" phrase, followed by action inside ``````. For example, \"In summary, the next action I will perform is ```click [1234]```\".\n5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.", + "examples": [ + [ + "OBSERVATION:\n[1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)'\n[1749] StaticText '$279.49'\n[1757] button 'Add to Cart'\n[1760] button 'Add to Wish List'\n[1761] button 'Add to Compare'\nURL: http://onestopmarket.com/office-products/office-electronics.html\nOBJECTIVE: What is the price of HP Inkjet Fax Machine?\nPREVIOUS ACTION: None", + "Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```" + ], + [ + "OBSERVATION:\n[204] heading '/f/food'\n[593] heading '[homemade] Obligatory Halloween Pumpkin Loaf!'\n\t[942] link '[homemade] Obligatory Halloween Pumpkin Loaf!'\n[945] StaticText 'Submitted by '\n[30] link 'kneechalice' expanded: False\n[1484] StaticText 't3_yid9lu'\n[949] time 'October 31, 2022 at 10:10:03 AM EDT'\n\t[1488] StaticText '1 year ago'\n[1489] link '45 comments'\n[605] heading '[I ate] Maple Pecan Croissant'\n\t[963] link '[I ate] Maple Pecan Croissant'\n[966] StaticText 'Submitted by '\n[37] link 'AccordingtoJP' expanded: False\n[1494] StaticText 't3_y3hrpn'\n[970] time 'October 13, 2022 at 10:41:09 PM EDT'\n\t[1498] StaticText '1 year ago'\n[1499] link '204 comments'\nURL: http://reddit.com\nOBJECTIVE: Tell me what the top comment on the croissant post says.\nPREVIOUS ACTION: None", + "Let's think step-by-step. This page has a post titled '[I ate] Maple Pecan Croissant', which is the post mentioned in the objective. In order to find the top comment, I will navigate into the comments section of the post. In summary, the next action I will perform is ```click [1499]```" + ], + [ + "OBSERVATION:\n[42] link 'My account'\n[43] link 'Logout'\n[44] link 'Publish Ad'\n[25] heading 'What are you looking for today?'\n[143] StaticText 'Keyword'\n[81] textbox 'e.g., a blue used car' required: False\n[146] StaticText 'Category'\n[28] heading 'Latest Listings'\n[86] link 'Atlas Powered Audio System w/ Tripod'\n\t[176] img 'Atlas Powered Audio System w/ Tripod'\n[511] StaticText '150.00 $'\n[88] link 'Neptune Gaming Console'\n\t[178] img 'Neptune Gaming Console'\n[515] StaticText '350.00 $'\nURL: http://classifieds.com\nOBJECTIVE: Help me find the cheapest dark colored guitar.\nPREVIOUS ACTION: None", + "Let's think step-by-step. The objective is to find the cheapest dark colored guitar on the site. The site has a search box whose ID is [81]. I can search for guitars by entering \"guitar\". I can submit this by pressing the Enter afterwards. In summary, the next action I will perform is ```type [81] [guitar] [1]```" + ] + ], + "template": "OBSERVATION:\n{observation}\nURL: {url}\nOBJECTIVE: {objective}\nPREVIOUS ACTION: {previous_action}", + "meta_data": { + "observation": "accessibility_tree", + "action_type": "id_accessibility_tree", + "keywords": [ + "url", + "objective", + "observation", + "previous_action" + ], + "prompt_constructor": "CoTPromptConstructor", + "answer_phrase": "In summary, the next action I will perform is", + "action_splitter": "```" + } +} \ No newline at end of file diff --git a/VAB-WebArena-Lite/agent/prompts/jsons/p_multimodal_cot_id_actree_3s.json b/VAB-WebArena-Lite/agent/prompts/jsons/p_multimodal_cot_id_actree_3s.json new file mode 100644 index 0000000..f65c66e --- /dev/null +++ b/VAB-WebArena-Lite/agent/prompts/jsons/p_multimodal_cot_id_actree_3s.json @@ -0,0 +1,34 @@ +{ + "intro": "You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue.\n\nHere's the information you'll have:\nThe user's objective: This is the task you're trying to complete.\nThe current web page's accessibility tree: This is a simplified representation of the webpage, providing key information.\nThe current web page's URL: This is the page you're currently navigating.\nThe open tabs: These are the tabs you have open.\nThe previous action: This is the action you just performed. It may be helpful to track your progress.\n\nThe actions you can perform fall into several categories:\n\nPage Operation Actions:\n```click [id]```: This action clicks on an element with a specific id on the webpage.\n```type [id] [content]```: Use this to type the content into the field with id. By default, the \"Enter\" key is pressed after typing unless press_enter_after is set to 0, i.e., ```type [id] [content] [0]```.\n```hover [id]```: Hover over an element with id.\n```press [key_comb]```: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).\n```scroll [down]``` or ```scroll [up]```: Scroll the page up or down.\n\nTab Management Actions:\n```new_tab```: Open a new, empty browser tab.\n```tab_focus [tab_index]```: Switch the browser's focus to a specific tab using its index.\n```close_tab```: Close the currently active tab.\n\nURL Navigation Actions:\n```goto [url]```: Navigate to a specific URL.\n```go_back```: Navigate to the previously viewed page.\n```go_forward```: Navigate to the next page (if a previous 'go_back' action was performed).\n\nCompletion Action:\n```stop [answer]```: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket.\n\nHomepage:\nIf you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.\nhttp://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites.\n\nTo be successful, it is very important to follow the following rules:\n1. You should only issue an action that is valid given the current observation\n2. You should only issue one action at a time.\n3. You should follow the examples to reason step by step and then issue the next action.\n4. Generate the action in the correct format. Start with a \"In summary, the next action I will perform is\" phrase, followed by action inside ``````. For example, \"In summary, the next action I will perform is ```click [1234]```\".\n5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.", + "examples": [ + [ + "OBSERVATION:\n[1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)'\n[1749] StaticText '$279.49'\n[1757] button 'Add to Cart'\n[1760] button 'Add to Wish List'\n[1761] button 'Add to Compare'\nURL: http://onestopmarket.com/office-products/office-electronics.html\nOBJECTIVE: What is the price of HP Inkjet Fax Machine?\nPREVIOUS ACTION: None", + "Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```", + "agent/prompts/multimodal_examples/multimodal_example1.png" + ], + [ + "OBSERVATION:\n[204] heading '/f/food'\n[593] heading '[homemade] Obligatory Halloween Pumpkin Loaf!'\n\t[942] link '[homemade] Obligatory Halloween Pumpkin Loaf!'\n[945] StaticText 'Submitted by '\n[30] link 'kneechalice' expanded: False\n[1484] StaticText 't3_yid9lu'\n[949] time 'October 31, 2022 at 10:10:03 AM EDT'\n\t[1488] StaticText '1 year ago'\n[1489] link '45 comments'\n[605] heading '[I ate] Maple Pecan Croissant'\n\t[963] link '[I ate] Maple Pecan Croissant'\n[966] StaticText 'Submitted by '\n[37] link 'AccordingtoJP' expanded: False\n[1494] StaticText 't3_y3hrpn'\n[970] time 'October 13, 2022 at 10:41:09 PM EDT'\n\t[1498] StaticText '1 year ago'\n[1499] link '204 comments'\nURL: http://reddit.com\nOBJECTIVE: Tell me what the top comment on the croissant post says.\nPREVIOUS ACTION: None", + "Let's think step-by-step. This page has a post titled '[I ate] Maple Pecan Croissant', which is the post mentioned in the objective. In order to find the top comment, I will navigate into the comments section of the post. In summary, the next action I will perform is ```click [1499]```", + "agent/prompts/multimodal_examples/multimodal_example2.png" + ], + [ + "OBSERVATION:\n[42] link 'My account'\n[43] link 'Logout'\n[44] link 'Publish Ad'\n[25] heading 'What are you looking for today?'\n[143] StaticText 'Keyword'\n[81] textbox 'e.g., a blue used car' required: False\n[146] StaticText 'Category'\n[28] heading 'Latest Listings'\n[86] link 'Atlas Powered Audio System w/ Tripod'\n\t[176] img 'Atlas Powered Audio System w/ Tripod'\n[511] StaticText '150.00 $'\n[88] link 'Neptune Gaming Console'\n\t[178] img 'Neptune Gaming Console'\n[515] StaticText '350.00 $'\nURL: http://classifieds.com\nOBJECTIVE: Help me find the cheapest dark colored guitar.\nPREVIOUS ACTION: None", + "Let's think step-by-step. The objective is to find the cheapest dark colored guitar on the site. The site has a search box whose ID is [81]. I can search for guitars by entering \"guitar\". I can submit this by pressing the Enter afterwards. In summary, the next action I will perform is ```type [81] [guitar] [1]```", + "agent/prompts/multimodal_examples/multimodal_example3.png" + ] + ], + "template": "OBSERVATION:\n{observation}\nURL: {url}\nOBJECTIVE: {objective}\nPREVIOUS ACTION: {previous_action}", + "meta_data": { + "observation": "accessibility_tree", + "action_type": "id_accessibility_tree", + "keywords": [ + "url", + "objective", + "observation", + "previous_action" + ], + "prompt_constructor": "MultimodalCoTPromptConstructor", + "answer_phrase": "In summary, the next action I will perform is", + "action_splitter": "```" + } +} \ No newline at end of file diff --git a/VAB-WebArena-Lite/agent/prompts/jsons/p_som_cot_id_actree_3s.json b/VAB-WebArena-Lite/agent/prompts/jsons/p_som_cot_id_actree_3s.json new file mode 100644 index 0000000..0fe6f35 --- /dev/null +++ b/VAB-WebArena-Lite/agent/prompts/jsons/p_som_cot_id_actree_3s.json @@ -0,0 +1,34 @@ +{ + "intro": "You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue.\n\nHere's the information you'll have:\nThe user's objective: This is the task you're trying to complete.\nThe current web page screenshot: This is a screenshot of the webpage, with each interactable element assigned a unique numerical id. Each bounding box and its respective id shares the same color.\nThe observation, which lists the IDs of all interactable elements on the current web page with their text content if any, in the format [id] [tagType] [text content]. tagType is the type of the element, such as button, link, or textbox. text content is the text content of the element. For example, [1234] [button] ['Add to Cart'] means that there is a button with id 1234 and text content 'Add to Cart' on the current web page. [] [StaticText] [text] means that the element is of some text that is not interactable.\nThe current web page's URL: This is the page you're currently navigating.\nThe open tabs: These are the tabs you have open.\nThe previous action: This is the action you just performed. It may be helpful to track your progress.\n\nThe actions you can perform fall into several categories:\n\nPage Operation Actions:\n```click [id]```: This action clicks on an element with a specific id on the webpage.\n```type [id] [content]```: Use this to type the content into the field with id. By default, the \"Enter\" key is pressed after typing unless press_enter_after is set to 0, i.e., ```type [id] [content] [0]```.\n```hover [id]```: Hover over an element with id.\n```press [key_comb]```: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).\n```scroll [down]``` or ```scroll [up]```: Scroll the page up or down.\n\nTab Management Actions:\n```new_tab```: Open a new, empty browser tab.\n```tab_focus [tab_index]```: Switch the browser's focus to a specific tab using its index.\n```close_tab```: Close the currently active tab.\n\nURL Navigation Actions:\n```goto [url]```: Navigate to a specific URL.\n```go_back```: Navigate to the previously viewed page.\n```go_forward```: Navigate to the next page (if a previous 'go_back' action was performed).\n\nCompletion Action:\n```stop [answer]```: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket.\n\nHomepage:\nIf you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.\nhttp://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites.\n\nTo be successful, it is very important to follow the following rules:\n1. You should only issue an action that is valid given the current observation\n2. You should only issue one action at a time.\n3. You should follow the examples to reason step by step and then issue the next action.\n4. Generate the action in the correct format. Start with a \"In summary, the next action I will perform is\" phrase, followed by action inside ``````. For example, \"In summary, the next action I will perform is ```click [1234]```\".\n5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.", + "examples": [ + [ + "OBSERVATION:\n[31] [IMG] [Image, description: hp fx-7010dn fax machine, url: http://ec2-3-13-232-171.us-east-2.compute.amazonaws.com:7770/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B08GKZ3ZKD.0.jpg]\n[32] [A] [HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)]\n[] [StaticText] [$279.49]\n[33] [BUTTON] [Add to Cart]\n[34] [A] [Add to Wish List]\n[35] [A] [Add to Compare]\nURL: http://onestopmarket.com/office-products/office-electronics.html\nOBJECTIVE: What is the price of HP Inkjet Fax Machine?\nPREVIOUS ACTION: None", + "Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```", + "agent/prompts/som_examples/som_example1.png" + ], + [ + "OBSERVATION:\n[] [StaticText] [/f/food]\n[] [StaticText] [[homemade] Obligatory Halloween Pumpkin Loaf!\tSubmitted by\tkneechalice\tt3_yid9lu\t1 year ago]\n[9] [IMG] []\n[] [StaticText] [Submitted by\tkneechalice\tt3_yid9lu\t1 year ago]\n[10] [A] [kneechalice]\n[11] [A] [45 comments]\n[] [StaticText] [[I ate] Maple Pecan Croissant\tSubmitted by\tAccordingtoJP\tt3_y3hrpn\t1 year ago]\n[14] [IMG] []\n[] [StaticText] [Submitted by\tAccordingtoJP\tt3_y3hrpn\t1 year ago]\n[15] [A] [AccordingtoJP]\n[16] [A] [204 comments]\nURL: http://reddit.com\nOBJECTIVE: Tell me what the top comment on the croissant post says.\nPREVIOUS ACTION: None", + "Let's think step-by-step. This page has a post titled '[I ate] Maple Pecan Croissant', which is the post mentioned in the objective. In order to find the top comment, I will navigate into the comments section of the post. In summary, the next action I will perform is ```click [11]```", + "agent/prompts/som_examples/som_example2.png" + ], + [ + "OBSERVATION:\n[] [StaticText] [What are you looking for today?]\n[5] [INPUT] []\n[6] [SELECT] [Select a category]\n[7] [BUTTON] [Search]\n[] [StaticText] [Latest Listings]\n[] [StaticText] [Atlas Powered Audio System w/ Tripod\t150.00 $\tMusic instruments\tBorough of Red Lion (Pennsylvania)\t2023/11/16]\n[8] [IMG] [Atlas Powered Audio System w/ Tripod]\n[9] [A] [Atlas Powered Audio System w/ Tripod]\n[] [StaticText] [150.00 $]\n[] [StaticText] [Neptune Gaming Console\t350.00 $\tVideo gaming\tPennwyn (Pennsylvania)\t2023/11/16]\n[10] [IMG] [Neptune Gaming Console]\n[11] [A] [Neptune Gaming Console]\n[] [StaticText] [350.00 $]\nURL: http://classifieds.com\nOBJECTIVE: Help me find the cheapest dark colored guitar.\nPREVIOUS ACTION: None", + "Let's think step-by-step. The objective is to find the cheapest dark colored guitar on the site. The site has a search box whose ID is [5]. I can search for guitars by entering \"guitar\". I can submit this by pressing the Enter afterwards. In summary, the next action I will perform is ```type [5] [guitar] [1]```", + "agent/prompts/som_examples/som_example3.png" + ] + ], + "template": "OBSERVATION: {observation}\nURL: {url}\nOBJECTIVE: {objective}\nPREVIOUS ACTION: {previous_action}", + "meta_data": { + "observation": "image_som", + "action_type": "som", + "keywords": [ + "url", + "objective", + "observation", + "previous_action" + ], + "prompt_constructor": "MultimodalCoTPromptConstructor", + "answer_phrase": "In summary, the next action I will perform is", + "action_splitter": "```" + } +} \ No newline at end of file diff --git a/VAB-WebArena-Lite/new/p_webrl.json b/VAB-WebArena-Lite/agent/prompts/jsons/p_webrl.json similarity index 100% rename from VAB-WebArena-Lite/new/p_webrl.json rename to VAB-WebArena-Lite/agent/prompts/jsons/p_webrl.json diff --git a/VAB-WebArena-Lite/new/p_webrl_chat.json b/VAB-WebArena-Lite/agent/prompts/jsons/p_webrl_chat.json similarity index 100% rename from VAB-WebArena-Lite/new/p_webrl_chat.json rename to VAB-WebArena-Lite/agent/prompts/jsons/p_webrl_chat.json diff --git a/VAB-WebArena-Lite/agent/prompts/multimodal_examples/multimodal_example1.png b/VAB-WebArena-Lite/agent/prompts/multimodal_examples/multimodal_example1.png new file mode 100644 index 0000000..c41834b Binary files /dev/null and b/VAB-WebArena-Lite/agent/prompts/multimodal_examples/multimodal_example1.png differ diff --git a/VAB-WebArena-Lite/agent/prompts/multimodal_examples/multimodal_example2.png b/VAB-WebArena-Lite/agent/prompts/multimodal_examples/multimodal_example2.png new file mode 100644 index 0000000..e59c7ed Binary files /dev/null and b/VAB-WebArena-Lite/agent/prompts/multimodal_examples/multimodal_example2.png differ diff --git a/VAB-WebArena-Lite/agent/prompts/multimodal_examples/multimodal_example3.png b/VAB-WebArena-Lite/agent/prompts/multimodal_examples/multimodal_example3.png new file mode 100644 index 0000000..c32e7bb Binary files /dev/null and b/VAB-WebArena-Lite/agent/prompts/multimodal_examples/multimodal_example3.png differ diff --git a/VAB-WebArena-Lite/new/prompt_constructor.py b/VAB-WebArena-Lite/agent/prompts/prompt_constructor.py similarity index 100% rename from VAB-WebArena-Lite/new/prompt_constructor.py rename to VAB-WebArena-Lite/agent/prompts/prompt_constructor.py diff --git a/VAB-WebArena-Lite/agent/prompts/raw/p_cot_id_actree_2s.py b/VAB-WebArena-Lite/agent/prompts/raw/p_cot_id_actree_2s.py new file mode 100644 index 0000000..b85e54c --- /dev/null +++ b/VAB-WebArena-Lite/agent/prompts/raw/p_cot_id_actree_2s.py @@ -0,0 +1,82 @@ +prompt = { + "intro": """You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue. + +Here's the information you'll have: +The user's objective: This is the task you're trying to complete. +The current web page's accessibility tree: This is a simplified representation of the webpage, providing key information. +The current web page's URL: This is the page you're currently navigating. +The open tabs: These are the tabs you have open. +The previous action: This is the action you just performed. It may be helpful to track your progress. + +The actions you can perform fall into several categories: + +Page Operation Actions: +`click [id]`: This action clicks on an element with a specific id on the webpage. +`type [id] [content] [press_enter_after=0|1]`: Use this to type the content into the field with id. By default, the "Enter" key is pressed after typing unless press_enter_after is set to 0. +`hover [id]`: Hover over an element with id. +`press [key_comb]`: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v). +`scroll [direction=down|up]`: Scroll the page up or down. + +Tab Management Actions: +`new_tab`: Open a new, empty browser tab. +`tab_focus [tab_index]`: Switch the browser's focus to a specific tab using its index. +`close_tab`: Close the currently active tab. + +URL Navigation Actions: +`goto [url]`: Navigate to a specific URL. +`go_back`: Navigate to the previously viewed page. +`go_forward`: Navigate to the next page (if a previous 'go_back' action was performed). + +Completion Action: +`stop [answer]`: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket. If you believe the task is impossible to complete, provide the answer as "N/A" in the bracket. + +Homepage: +If you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit. +http://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites. + +To be successful, it is very important to follow the following rules: +1. You should only issue an action that is valid given the current observation +2. You should only issue one action at a time. +3. You should follow the examples to reason step by step and then issue the next action. +4. Generate the action in the correct format. Start with a "In summary, the next action I will perform is" phrase, followed by action inside ``````. For example, "In summary, the next action I will perform is ```click [1234]```". +5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.""", + "examples": [ + ( + """OBSERVATION: +[1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)' + [1749] StaticText '$279.49' + [1757] button 'Add to Cart' + [1760] button 'Add to Wish List' + [1761] button 'Add to Compare' +URL: http://onestopmarket.com/office-products/office-electronics.html +OBJECTIVE: What is the price of HP Inkjet Fax Machine +PREVIOUS ACTION: None""", + "Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```", + ), + ( + """OBSERVATION: +[164] textbox 'Search' focused: True required: False +[171] button 'Go' +[174] link 'Find directions between two points' +[212] heading 'Search Results' +[216] button 'Close' +URL: http://openstreetmap.org +OBJECTIVE: Show me the restaurants near CMU +PREVIOUS ACTION: None""", + "Let's think step-by-step. This page has a search box whose ID is [164]. According to the nominatim rule of openstreetmap, I can search for the restaurants near a location by \"restaurants near\". I can submit my typing by pressing the Enter afterwards. In summary, the next action I will perform is ```type [164] [restaurants near CMU] [1]```", + ), + ], + "template": """OBSERVATION: +{observation} +URL: {url} +OBJECTIVE: {objective} +PREVIOUS ACTION: {previous_action}""", + "meta_data": { + "observation": "accessibility_tree", + "action_type": "id_accessibility_tree", + "keywords": ["url", "objective", "observation", "previous_action"], + "prompt_constructor": "CoTPromptConstructor", + "answer_phrase": "In summary, the next action I will perform is", + "action_splitter": "```" + }, +} diff --git a/VAB-WebArena-Lite/agent/prompts/raw/p_cot_id_actree_2s_no_na.py b/VAB-WebArena-Lite/agent/prompts/raw/p_cot_id_actree_2s_no_na.py new file mode 100644 index 0000000..945cd95 --- /dev/null +++ b/VAB-WebArena-Lite/agent/prompts/raw/p_cot_id_actree_2s_no_na.py @@ -0,0 +1,82 @@ +prompt = { + "intro": """You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue. + +Here's the information you'll have: +The user's objective: This is the task you're trying to complete. +The current web page's accessibility tree: This is a simplified representation of the webpage, providing key information. +The current web page's URL: This is the page you're currently navigating. +The open tabs: These are the tabs you have open. +The previous action: This is the action you just performed. It may be helpful to track your progress. + +The actions you can perform fall into several categories: + +Page Operation Actions: +`click [id]`: This action clicks on an element with a specific id on the webpage. +`type [id] [content] [press_enter_after=0|1]`: Use this to type the content into the field with id. By default, the "Enter" key is pressed after typing unless press_enter_after is set to 0. +`hover [id]`: Hover over an element with id. +`press [key_comb]`: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v). +`scroll [direction=down|up]`: Scroll the page up or down. + +Tab Management Actions: +`new_tab`: Open a new, empty browser tab. +`tab_focus [tab_index]`: Switch the browser's focus to a specific tab using its index. +`close_tab`: Close the currently active tab. + +URL Navigation Actions: +`goto [url]`: Navigate to a specific URL. +`go_back`: Navigate to the previously viewed page. +`go_forward`: Navigate to the next page (if a previous 'go_back' action was performed). + +Completion Action: +`stop [answer]`: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket. + +Homepage: +If you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit. +http://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites. + +To be successful, it is very important to follow the following rules: +1. You should only issue an action that is valid given the current observation +2. You should only issue one action at a time. +3. You should follow the examples to reason step by step and then issue the next action. +4. Generate the action in the correct format. Start with a "In summary, the next action I will perform is" phrase, followed by action inside ``````. For example, "In summary, the next action I will perform is ```click [1234]```". +5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.""", + "examples": [ + ( + """OBSERVATION: +[1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)' + [1749] StaticText '$279.49' + [1757] button 'Add to Cart' + [1760] button 'Add to Wish List' + [1761] button 'Add to Compare' +URL: http://onestopmarket.com/office-products/office-electronics.html +OBJECTIVE: What is the price of HP Inkjet Fax Machine +PREVIOUS ACTION: None""", + "Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```", + ), + ( + """OBSERVATION: +[164] textbox 'Search' focused: True required: False +[171] button 'Go' +[174] link 'Find directions between two points' +[212] heading 'Search Results' +[216] button 'Close' +URL: http://openstreetmap.org +OBJECTIVE: Show me the restaurants near CMU +PREVIOUS ACTION: None""", + "Let's think step-by-step. This page has a search box whose ID is [164]. According to the nominatim rule of openstreetmap, I can search for the restaurants near a location by \"restaurants near\". I can submit my typing by pressing the Enter afterwards. In summary, the next action I will perform is ```type [164] [restaurants near CMU] [1]```", + ), + ], + "template": """OBSERVATION: +{observation} +URL: {url} +OBJECTIVE: {objective} +PREVIOUS ACTION: {previous_action}""", + "meta_data": { + "observation": "accessibility_tree", + "action_type": "id_accessibility_tree", + "keywords": ["url", "objective", "observation", "previous_action"], + "prompt_constructor": "CoTPromptConstructor", + "answer_phrase": "In summary, the next action I will perform is", + "action_splitter": "```" + }, +} diff --git a/VAB-WebArena-Lite/agent/prompts/raw/p_cot_id_actree_3s.py b/VAB-WebArena-Lite/agent/prompts/raw/p_cot_id_actree_3s.py new file mode 100644 index 0000000..d3479a1 --- /dev/null +++ b/VAB-WebArena-Lite/agent/prompts/raw/p_cot_id_actree_3s.py @@ -0,0 +1,115 @@ +prompt = { + "intro": """You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue. + +Here's the information you'll have: +The user's objective: This is the task you're trying to complete. +The current web page's accessibility tree: This is a simplified representation of the webpage, providing key information. +The current web page's URL: This is the page you're currently navigating. +The open tabs: These are the tabs you have open. +The previous action: This is the action you just performed. It may be helpful to track your progress. + +The actions you can perform fall into several categories: + +Page Operation Actions: +```click [id]```: This action clicks on an element with a specific id on the webpage. +```type [id] [content]```: Use this to type the content into the field with id. By default, the "Enter" key is pressed after typing unless press_enter_after is set to 0, i.e., ```type [id] [content] [0]```. +```hover [id]```: Hover over an element with id. +```press [key_comb]```: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v). +```scroll [down]``` or ```scroll [up]```: Scroll the page up or down. + +Tab Management Actions: +```new_tab```: Open a new, empty browser tab. +```tab_focus [tab_index]```: Switch the browser's focus to a specific tab using its index. +```close_tab```: Close the currently active tab. + +URL Navigation Actions: +```goto [url]```: Navigate to a specific URL. +```go_back```: Navigate to the previously viewed page. +```go_forward```: Navigate to the next page (if a previous 'go_back' action was performed). + +Completion Action: +```stop [answer]```: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket. + +Homepage: +If you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit. +http://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites. + +To be successful, it is very important to follow the following rules: +1. You should only issue an action that is valid given the current observation +2. You should only issue one action at a time. +3. You should follow the examples to reason step by step and then issue the next action. +4. Generate the action in the correct format. Start with a "In summary, the next action I will perform is" phrase, followed by action inside ``````. For example, "In summary, the next action I will perform is ```click [1234]```". +5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.""", + "examples": [ + ( + """OBSERVATION: +[1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)' +[1749] StaticText '$279.49' +[1757] button 'Add to Cart' +[1760] button 'Add to Wish List' +[1761] button 'Add to Compare' +URL: http://onestopmarket.com/office-products/office-electronics.html +OBJECTIVE: What is the price of HP Inkjet Fax Machine? +PREVIOUS ACTION: None""", + "Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```", + ), + ( + """OBSERVATION: +[204] heading '/f/food' +[593] heading '[homemade] Obligatory Halloween Pumpkin Loaf!' + [942] link '[homemade] Obligatory Halloween Pumpkin Loaf!' +[945] StaticText 'Submitted by ' +[30] link 'kneechalice' expanded: False +[1484] StaticText 't3_yid9lu' +[949] time 'October 31, 2022 at 10:10:03 AM EDT' + [1488] StaticText '1 year ago' +[1489] link '45 comments' +[605] heading '[I ate] Maple Pecan Croissant' + [963] link '[I ate] Maple Pecan Croissant' +[966] StaticText 'Submitted by ' +[37] link 'AccordingtoJP' expanded: False +[1494] StaticText 't3_y3hrpn' +[970] time 'October 13, 2022 at 10:41:09 PM EDT' + [1498] StaticText '1 year ago' +[1499] link '204 comments' +URL: http://reddit.com +OBJECTIVE: Tell me what the top comment on the croissant post says. +PREVIOUS ACTION: None""", + "Let's think step-by-step. This page has a post titled '[I ate] Maple Pecan Croissant', which is the post mentioned in the objective. In order to find the top comment, I will navigate into the comments section of the post. In summary, the next action I will perform is ```click [1499]```", + ), + ( + """OBSERVATION: +[42] link 'My account' +[43] link 'Logout' +[44] link 'Publish Ad' +[25] heading 'What are you looking for today?' +[143] StaticText 'Keyword' +[81] textbox 'e.g., a blue used car' required: False +[146] StaticText 'Category' +[28] heading 'Latest Listings' +[86] link 'Atlas Powered Audio System w/ Tripod' + [176] img 'Atlas Powered Audio System w/ Tripod' +[511] StaticText '150.00 $' +[88] link 'Neptune Gaming Console' + [178] img 'Neptune Gaming Console' +[515] StaticText '350.00 $' +URL: http://classifieds.com +OBJECTIVE: Help me find the cheapest dark colored guitar. +PREVIOUS ACTION: None""", + "Let's think step-by-step. The objective is to find the cheapest dark colored guitar on the site. The site has a search box whose ID is [81]. I can search for guitars by entering \"guitar\". I can submit this by pressing the Enter afterwards. In summary, the next action I will perform is ```type [81] [guitar] [1]```", + ), + ], + "template": """OBSERVATION: +{observation} +URL: {url} +OBJECTIVE: {objective} +PREVIOUS ACTION: {previous_action}""", + "meta_data": { + "observation": "accessibility_tree", + "action_type": "id_accessibility_tree", + "keywords": ["url", "objective", "observation", "previous_action"], + "prompt_constructor": "CoTPromptConstructor", + "answer_phrase": "In summary, the next action I will perform is", + "action_splitter": "```" + }, +} diff --git a/VAB-WebArena-Lite/agent/prompts/raw/p_multimodal_cot_id_actree_3s.py b/VAB-WebArena-Lite/agent/prompts/raw/p_multimodal_cot_id_actree_3s.py new file mode 100644 index 0000000..535960b --- /dev/null +++ b/VAB-WebArena-Lite/agent/prompts/raw/p_multimodal_cot_id_actree_3s.py @@ -0,0 +1,118 @@ +prompt = { + "intro": """You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue. + +Here's the information you'll have: +The user's objective: This is the task you're trying to complete. +The current web page's accessibility tree: This is a simplified representation of the webpage, providing key information. +The current web page's URL: This is the page you're currently navigating. +The open tabs: These are the tabs you have open. +The previous action: This is the action you just performed. It may be helpful to track your progress. + +The actions you can perform fall into several categories: + +Page Operation Actions: +```click [id]```: This action clicks on an element with a specific id on the webpage. +```type [id] [content]```: Use this to type the content into the field with id. By default, the "Enter" key is pressed after typing unless press_enter_after is set to 0, i.e., ```type [id] [content] [0]```. +```hover [id]```: Hover over an element with id. +```press [key_comb]```: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v). +```scroll [down]``` or ```scroll [up]```: Scroll the page up or down. + +Tab Management Actions: +```new_tab```: Open a new, empty browser tab. +```tab_focus [tab_index]```: Switch the browser's focus to a specific tab using its index. +```close_tab```: Close the currently active tab. + +URL Navigation Actions: +```goto [url]```: Navigate to a specific URL. +```go_back```: Navigate to the previously viewed page. +```go_forward```: Navigate to the next page (if a previous 'go_back' action was performed). + +Completion Action: +```stop [answer]```: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket. + +Homepage: +If you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit. +http://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites. + +To be successful, it is very important to follow the following rules: +1. You should only issue an action that is valid given the current observation +2. You should only issue one action at a time. +3. You should follow the examples to reason step by step and then issue the next action. +4. Generate the action in the correct format. Start with a "In summary, the next action I will perform is" phrase, followed by action inside ``````. For example, "In summary, the next action I will perform is ```click [1234]```". +5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.""", + "examples": [ + ( + """OBSERVATION: +[1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)' +[1749] StaticText '$279.49' +[1757] button 'Add to Cart' +[1760] button 'Add to Wish List' +[1761] button 'Add to Compare' +URL: http://onestopmarket.com/office-products/office-electronics.html +OBJECTIVE: What is the price of HP Inkjet Fax Machine? +PREVIOUS ACTION: None""", + "Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```", + "agent/prompts/multimodal_examples/multimodal_example1.png" + ), + ( + """OBSERVATION: +[204] heading '/f/food' +[593] heading '[homemade] Obligatory Halloween Pumpkin Loaf!' + [942] link '[homemade] Obligatory Halloween Pumpkin Loaf!' +[945] StaticText 'Submitted by ' +[30] link 'kneechalice' expanded: False +[1484] StaticText 't3_yid9lu' +[949] time 'October 31, 2022 at 10:10:03 AM EDT' + [1488] StaticText '1 year ago' +[1489] link '45 comments' +[605] heading '[I ate] Maple Pecan Croissant' + [963] link '[I ate] Maple Pecan Croissant' +[966] StaticText 'Submitted by ' +[37] link 'AccordingtoJP' expanded: False +[1494] StaticText 't3_y3hrpn' +[970] time 'October 13, 2022 at 10:41:09 PM EDT' + [1498] StaticText '1 year ago' +[1499] link '204 comments' +URL: http://reddit.com +OBJECTIVE: Tell me what the top comment on the croissant post says. +PREVIOUS ACTION: None""", + "Let's think step-by-step. This page has a post titled '[I ate] Maple Pecan Croissant', which is the post mentioned in the objective. In order to find the top comment, I will navigate into the comments section of the post. In summary, the next action I will perform is ```click [1499]```", + "agent/prompts/multimodal_examples/multimodal_example2.png" + ), + ( + """OBSERVATION: +[42] link 'My account' +[43] link 'Logout' +[44] link 'Publish Ad' +[25] heading 'What are you looking for today?' +[143] StaticText 'Keyword' +[81] textbox 'e.g., a blue used car' required: False +[146] StaticText 'Category' +[28] heading 'Latest Listings' +[86] link 'Atlas Powered Audio System w/ Tripod' + [176] img 'Atlas Powered Audio System w/ Tripod' +[511] StaticText '150.00 $' +[88] link 'Neptune Gaming Console' + [178] img 'Neptune Gaming Console' +[515] StaticText '350.00 $' +URL: http://classifieds.com +OBJECTIVE: Help me find the cheapest dark colored guitar. +PREVIOUS ACTION: None""", + "Let's think step-by-step. The objective is to find the cheapest dark colored guitar on the site. The site has a search box whose ID is [81]. I can search for guitars by entering \"guitar\". I can submit this by pressing the Enter afterwards. In summary, the next action I will perform is ```type [81] [guitar] [1]```", + "agent/prompts/multimodal_examples/multimodal_example3.png" + ), + ], + "template": """OBSERVATION: +{observation} +URL: {url} +OBJECTIVE: {objective} +PREVIOUS ACTION: {previous_action}""", + "meta_data": { + "observation": "accessibility_tree", + "action_type": "id_accessibility_tree", + "keywords": ["url", "objective", "observation", "previous_action"], + "prompt_constructor": "MultimodalCoTPromptConstructor", + "answer_phrase": "In summary, the next action I will perform is", + "action_splitter": "```" + }, +} diff --git a/VAB-WebArena-Lite/agent/prompts/raw/p_som_cot_id_actree_3s.py b/VAB-WebArena-Lite/agent/prompts/raw/p_som_cot_id_actree_3s.py new file mode 100644 index 0000000..7d822b9 --- /dev/null +++ b/VAB-WebArena-Lite/agent/prompts/raw/p_som_cot_id_actree_3s.py @@ -0,0 +1,112 @@ +prompt = { + "intro": """You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue. + +Here's the information you'll have: +The user's objective: This is the task you're trying to complete. +The current web page screenshot: This is a screenshot of the webpage, with each interactable element assigned a unique numerical id. Each bounding box and its respective id shares the same color. +The observation, which lists the IDs of all interactable elements on the current web page with their text content if any, in the format [id] [tagType] [text content]. tagType is the type of the element, such as button, link, or textbox. text content is the text content of the element. For example, [1234] [button] ['Add to Cart'] means that there is a button with id 1234 and text content 'Add to Cart' on the current web page. [] [StaticText] [text] means that the element is of some text that is not interactable. +The current web page's URL: This is the page you're currently navigating. +The open tabs: These are the tabs you have open. +The previous action: This is the action you just performed. It may be helpful to track your progress. + +The actions you can perform fall into several categories: + +Page Operation Actions: +```click [id]```: This action clicks on an element with a specific id on the webpage. +```type [id] [content]```: Use this to type the content into the field with id. By default, the "Enter" key is pressed after typing unless press_enter_after is set to 0, i.e., ```type [id] [content] [0]```. +```hover [id]```: Hover over an element with id. +```press [key_comb]```: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v). +```scroll [down]``` or ```scroll [up]```: Scroll the page up or down. + +Tab Management Actions: +```new_tab```: Open a new, empty browser tab. +```tab_focus [tab_index]```: Switch the browser's focus to a specific tab using its index. +```close_tab```: Close the currently active tab. + +URL Navigation Actions: +```goto [url]```: Navigate to a specific URL. +```go_back```: Navigate to the previously viewed page. +```go_forward```: Navigate to the next page (if a previous 'go_back' action was performed). + +Completion Action: +```stop [answer]```: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket. + +Homepage: +If you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit. +http://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites. + +To be successful, it is very important to follow the following rules: +1. You should only issue an action that is valid given the current observation +2. You should only issue one action at a time. +3. You should follow the examples to reason step by step and then issue the next action. +4. Generate the action in the correct format. Start with a "In summary, the next action I will perform is" phrase, followed by action inside ``````. For example, "In summary, the next action I will perform is ```click [1234]```". +5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.""", + "examples": [ + ( + """OBSERVATION: +[31] [IMG] [Image, description: hp fx-7010dn fax machine, url: http://ec2-3-13-232-171.us-east-2.compute.amazonaws.com:7770/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B08GKZ3ZKD.0.jpg] +[32] [A] [HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)] +[] [StaticText] [$279.49] +[33] [BUTTON] [Add to Cart] +[34] [A] [Add to Wish List] +[35] [A] [Add to Compare] +URL: http://onestopmarket.com/office-products/office-electronics.html +OBJECTIVE: What is the price of HP Inkjet Fax Machine? +PREVIOUS ACTION: None""", + "Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```", + "agent/prompts/som_examples/som_example1.png" + ), + ( + """OBSERVATION: +[] [StaticText] [/f/food] +[] [StaticText] [[homemade] Obligatory Halloween Pumpkin Loaf! Submitted by kneechalice t3_yid9lu 1 year ago] +[9] [IMG] [] +[] [StaticText] [Submitted by kneechalice t3_yid9lu 1 year ago] +[10] [A] [kneechalice] +[11] [A] [45 comments] +[] [StaticText] [[I ate] Maple Pecan Croissant Submitted by AccordingtoJP t3_y3hrpn 1 year ago] +[14] [IMG] [] +[] [StaticText] [Submitted by AccordingtoJP t3_y3hrpn 1 year ago] +[15] [A] [AccordingtoJP] +[16] [A] [204 comments] +URL: http://reddit.com +OBJECTIVE: Tell me what the top comment on the croissant post says. +PREVIOUS ACTION: None""", + "Let's think step-by-step. This page has a post titled '[I ate] Maple Pecan Croissant', which is the post mentioned in the objective. In order to find the top comment, I will navigate into the comments section of the post. In summary, the next action I will perform is ```click [11]```", + "agent/prompts/som_examples/som_example2.png" + ), + ( + """OBSERVATION: +[] [StaticText] [What are you looking for today?] +[5] [INPUT] [] +[6] [SELECT] [Select a category] +[7] [BUTTON] [Search] +[] [StaticText] [Latest Listings] +[] [StaticText] [Atlas Powered Audio System w/ Tripod 150.00 $ Music instruments Borough of Red Lion (Pennsylvania) 2023/11/16] +[8] [IMG] [Atlas Powered Audio System w/ Tripod] +[9] [A] [Atlas Powered Audio System w/ Tripod] +[] [StaticText] [150.00 $] +[] [StaticText] [Neptune Gaming Console 350.00 $ Video gaming Pennwyn (Pennsylvania) 2023/11/16] +[10] [IMG] [Neptune Gaming Console] +[11] [A] [Neptune Gaming Console] +[] [StaticText] [350.00 $] +URL: http://classifieds.com +OBJECTIVE: Help me find the cheapest dark colored guitar. +PREVIOUS ACTION: None""", + "Let's think step-by-step. The objective is to find the cheapest dark colored guitar on the site. The site has a search box whose ID is [5]. I can search for guitars by entering \"guitar\". I can submit this by pressing the Enter afterwards. In summary, the next action I will perform is ```type [5] [guitar] [1]```", + "agent/prompts/som_examples/som_example3.png" + ), + ], + "template": """OBSERVATION: {observation} +URL: {url} +OBJECTIVE: {objective} +PREVIOUS ACTION: {previous_action}""", + "meta_data": { + "observation": "image_som", + "action_type": "som", + "keywords": ["url", "objective", "observation", "previous_action"], + "prompt_constructor": "MultimodalCoTPromptConstructor", + "answer_phrase": "In summary, the next action I will perform is", + "action_splitter": "```" + }, +} diff --git a/VAB-WebArena-Lite/agent/prompts/som_examples/som_example1.png b/VAB-WebArena-Lite/agent/prompts/som_examples/som_example1.png new file mode 100644 index 0000000..7347b09 Binary files /dev/null and b/VAB-WebArena-Lite/agent/prompts/som_examples/som_example1.png differ diff --git a/VAB-WebArena-Lite/agent/prompts/som_examples/som_example2.png b/VAB-WebArena-Lite/agent/prompts/som_examples/som_example2.png new file mode 100644 index 0000000..88a2c01 Binary files /dev/null and b/VAB-WebArena-Lite/agent/prompts/som_examples/som_example2.png differ diff --git a/VAB-WebArena-Lite/agent/prompts/som_examples/som_example3.png b/VAB-WebArena-Lite/agent/prompts/som_examples/som_example3.png new file mode 100644 index 0000000..50fd06e Binary files /dev/null and b/VAB-WebArena-Lite/agent/prompts/som_examples/som_example3.png differ diff --git a/VAB-WebArena-Lite/agent/prompts/to_json.py b/VAB-WebArena-Lite/agent/prompts/to_json.py new file mode 100644 index 0000000..efb283c --- /dev/null +++ b/VAB-WebArena-Lite/agent/prompts/to_json.py @@ -0,0 +1,26 @@ +import glob +import importlib +import json +import os + + +# use the current directory as the root +def run() -> None: + """Convert all python files in agent/prompts to json files in agent/prompts/jsons + + Python files are easiser to edit + """ + for p_file in glob.glob(f"agent/prompts/raw/*.py"): + # import the file as a module + base_name = os.path.basename(p_file).replace(".py", "") + module = importlib.import_module(f"agent.prompts.raw.{base_name}") + prompt = module.prompt + # save the prompt as a json file + os.makedirs("agent/prompts/jsons", exist_ok=True) + with open(f"agent/prompts/jsons/{base_name}.json", "w+") as f: + json.dump(prompt, f, indent=2) + print(f"Done convert python files to json") + + +if __name__ == "__main__": + run() diff --git a/VAB-WebArena-Lite/browser_env/__init__.py b/VAB-WebArena-Lite/browser_env/__init__.py new file mode 100644 index 0000000..90ecdbe --- /dev/null +++ b/VAB-WebArena-Lite/browser_env/__init__.py @@ -0,0 +1,76 @@ +import asyncio + +from .actions import ( + Action, + ActionParsingError, + ActionTypes, + action2create_function, + action2str, + create_check_action, + create_click_action, + create_focus_and_click_action, + create_focus_and_type_action, + create_go_back_action, + create_go_forward_action, + create_goto_url_action, + create_hover_action, + create_id_based_action, + create_key_press_action, + create_keyboard_type_action, + create_mouse_click_action, + create_mouse_hover_action, + create_new_tab_action, + create_none_action, + create_page_close_action, + create_page_focus_action, + create_playwright_action, + create_random_action, + create_scroll_action, + create_select_option_action, + create_stop_action, + create_type_action, + is_equivalent, +) +from .async_envs import AsyncScriptBrowserEnv +from .envs import ScriptBrowserEnv +from .processors import ObservationMetadata +from .trajectory import Trajectory +from .utils import DetachedPage, StateInfo + +__all__ = [ + "ScriptBrowserEnv", + "AsyncScriptBrowserEnv", + "DetachedPage", + "StateInfo", + "ObservationMetadata", + "Action", + "ActionTypes", + "action2str", + "create_random_action", + "create_focus_and_click_action", + "create_focus_and_type_action", + "is_equivalent", + "create_mouse_click_action", + "create_mouse_hover_action", + "create_none_action", + "create_keyboard_type_action", + "create_page_focus_action", + "create_new_tab_action", + "create_go_back_action", + "create_go_forward_action", + "create_goto_url_action", + "create_page_close_action", + "action2create_function", + "create_playwright_action", + "create_id_based_action", + "create_scroll_action", + "create_key_press_action", + "create_check_action", + "create_click_action", + "create_type_action", + "create_hover_action", + "create_select_option_action", + "create_stop_action", + "ActionParsingError", + "Trajectory", +] diff --git a/VAB-WebArena-Lite/new/actions.py b/VAB-WebArena-Lite/browser_env/actions.py similarity index 100% rename from VAB-WebArena-Lite/new/actions.py rename to VAB-WebArena-Lite/browser_env/actions.py diff --git a/VAB-WebArena-Lite/browser_env/async_envs.py b/VAB-WebArena-Lite/browser_env/async_envs.py new file mode 100644 index 0000000..312d770 --- /dev/null +++ b/VAB-WebArena-Lite/browser_env/async_envs.py @@ -0,0 +1,160 @@ +import asyncio +import json +from dataclasses import dataclass +from pathlib import Path + +import numpy as np +import numpy.typing as npt +from beartype import beartype +from gymnasium import Env +from gymnasium.spaces import Box, Text +from playwright.async_api import Page, ViewportSize, async_playwright + +from .actions import Action, aexecute_action, get_action_space +from .utils import DetachedPage, png_bytes_to_numpy + + +class AsyncScriptBrowserEnv(Env[npt.NDArray[np.uint8], Action]): + """ + The goal of this environment is to produce a prototype of a browser environment. + In the end, we want to support a fully configurable browser environment with wide + range of action spaces and observation spaces, both structured and unstructured. + But in this prototype, we just support action space specified by Playwright script, + and observation space is the html content of the page. + """ + + @beartype + def __init__( + self, + max_page_length: int = 2048, + headless: bool = True, + slow_mo: int = 0, + timeout: int = 30000, + viewport_size: ViewportSize = {"width": 1280, "height": 720}, + ): + self.observation_space = Box( + 0, + 255, + (viewport_size["height"], viewport_size["width"], 4), + np.uint8, + ) + # TODO: make Space[Action] = ActionSpace + self.action_space = get_action_space() # type: ignore[assignment] + self.headless = headless + self.slow_mo = slow_mo + self.reset_finished = False + self.timeout = timeout + self.viewport_size = viewport_size + + @beartype + async def setup(self, config_file: Path | None = None) -> None: + self.context_manager = async_playwright() + self.playwright = await self.context_manager.__aenter__() + self.browser = await self.playwright.chromium.launch( + headless=self.headless, slow_mo=self.slow_mo + ) + if config_file: + with open(config_file, "r") as f: + instance_config = json.load(f) + else: + instance_config = {} + + storage_state = instance_config.get("storage_state", None) + start_url = instance_config.get("start_url", None) + geolocation = instance_config.get("geolocation", None) + + self.context = await self.browser.new_context( + viewport=self.viewport_size, + storage_state=storage_state, + geolocation=geolocation, + device_scale_factor=1, + ) + self.page = await self.context.new_page() + if start_url: + await self.page.goto(start_url) + + @beartype + async def areset( + self, + *, + seed: int | None = None, + options: dict[str, str] | None = None, + ) -> tuple[npt.NDArray[np.uint8], dict[str, object]]: + """ + Reset the environment. + :param options: options for the environment. The options are: + - storage_state: the path to the storage state file + """ + super().reset(seed=seed, options=options) + if self.reset_finished: + await self.context_manager.__aexit__() + if options is not None and "config_file" in options: + config_file = Path(options["config_file"]) + if config_file.exists(): + await self.setup(config_file=config_file) + else: + raise ValueError(f"Config state {config_file} does not exist.") + else: + await self.setup() + self.reset_finished = True + content = await self.page.content() + screenshot = png_bytes_to_numpy(await self.page.screenshot()) + return ( + screenshot, + {"page": DetachedPage(self.page.url, content)}, + ) + + @beartype + def reset( + self, + *, + seed: int | None = None, + options: dict[str, str] | None = None, + ) -> tuple[npt.NDArray[np.uint8], dict[str, object]]: + return asyncio.run(self.areset(seed=seed, options=options)) + + async def aclose(self) -> None: + if self.reset_finished: + await self.context_manager.__aexit__() + + def close(self) -> None: + asyncio.run(self.aclose()) + + @beartype + async def astep( + self, action: Action + ) -> tuple[npt.NDArray[np.uint8], float, bool, bool, dict[str, object]]: + if not self.reset_finished: + raise RuntimeError("Call reset first before calling step.") + success = False + fail_error = "" + try: + self.page = await aexecute_action(action, self.page, self.context) + success = True + except Exception as e: + fail_error = str(e) + + try: + content = await self.page.content() + screenshot = png_bytes_to_numpy(await self.page.screenshot()) + except: + await self.page.wait_for_load_state("load") + content = await self.page.content() + screenshot = png_bytes_to_numpy(await self.page.screenshot()) + + return ( + screenshot, + float(success), + False, + False, + { + "page": DetachedPage(self.page.url, content), + "fail_error": fail_error, + }, + ) + + @beartype + def step( + self, action: Action + ) -> tuple[npt.NDArray[np.uint8], float, bool, bool, dict[str, object]]: + return asyncio.run(self.astep(action), debug=True) diff --git a/VAB-WebArena-Lite/browser_env/auto_login.py b/VAB-WebArena-Lite/browser_env/auto_login.py new file mode 100644 index 0000000..67a22c9 --- /dev/null +++ b/VAB-WebArena-Lite/browser_env/auto_login.py @@ -0,0 +1,182 @@ +"""Script to automatically login each website""" +import argparse +import glob +import os +import time +from concurrent.futures import ThreadPoolExecutor +from itertools import combinations +from pathlib import Path + +from playwright.sync_api import sync_playwright +from browser_env.env_config import ACCOUNTS + +DATASET = os.environ["DATASET"] +if DATASET == "webarena": + from browser_env.env_config import ( + GITLAB, + REDDIT, + SHOPPING, + SHOPPING_ADMIN, + ) + SITES = ["gitlab", "shopping", "shopping_admin", "reddit"] + URLS = [ + f"{GITLAB}/-/profile", + f"{SHOPPING}/wishlist/", + f"{SHOPPING_ADMIN}/dashboard", + f"{REDDIT}/user/{ACCOUNTS['reddit']['username']}/account", + ] + EXACT_MATCH = [True, True, True, True] + KEYWORDS = ["", "", "Dashboard", "Delete"] + +elif DATASET == "visualwebarena": + from browser_env.env_config import ( + CLASSIFIEDS, + REDDIT, + SHOPPING, + ) + SITES = ["shopping", "reddit", "classifieds"] + URLS = [ + f"{SHOPPING}/wishlist/", + f"{REDDIT}/user/{ACCOUNTS['reddit']['username']}/account", + f"{CLASSIFIEDS}/index.php?page=user&action=items", + ] + EXACT_MATCH = [True, True, True] + KEYWORDS = ["", "Delete", "My listings"] +else: + raise ValueError(f"Dataset not implemented: {DATASET}") + +HEADLESS = True +SLOW_MO = 0 + +assert len(SITES) == len(URLS) == len(EXACT_MATCH) == len(KEYWORDS) + +def is_expired( + storage_state: Path, url: str, keyword: str, url_exact: bool = True +) -> bool: + """Test whether the cookie is expired""" + if not storage_state.exists(): + return True + + context_manager = sync_playwright() + playwright = context_manager.__enter__() + browser = playwright.chromium.launch(headless=True, slow_mo=SLOW_MO) + context = browser.new_context(storage_state=storage_state) + page = context.new_page() + page.goto(url) + time.sleep(1) + d_url = page.url + content = page.content() + context_manager.__exit__() + if keyword: + return keyword not in content + else: + if url_exact: + return d_url != url + else: + return url not in d_url + + +def renew_comb(comb: list[str], auth_folder: str = "./.auth") -> None: + context_manager = sync_playwright() + playwright = context_manager.__enter__() + browser = playwright.chromium.launch(headless=HEADLESS) + context = browser.new_context() + page = context.new_page() + + if "shopping" in comb: + username = ACCOUNTS["shopping"]["username"] + password = ACCOUNTS["shopping"]["password"] + page.goto(f"{SHOPPING}/customer/account/login/") + page.get_by_label("Email", exact=True).fill(username) + page.get_by_label("Password", exact=True).fill(password) + page.get_by_role("button", name="Sign In").click() + + if "reddit" in comb: + username = ACCOUNTS["reddit"]["username"] + password = ACCOUNTS["reddit"]["password"] + page.goto(f"{REDDIT}/login") + page.get_by_label("Username").fill(username) + page.get_by_label("Password").fill(password) + page.get_by_role("button", name="Log in").click() + + if "classifieds" in comb: + username = ACCOUNTS["classifieds"]["username"] + password = ACCOUNTS["classifieds"]["password"] + page.goto(f"{CLASSIFIEDS}/index.php?page=login") + page.locator("#email").fill(username) + page.locator("#password").fill(password) + page.get_by_role("button", name="Log in").click() + + if "shopping_admin" in comb: + username = ACCOUNTS["shopping_admin"]["username"] + password = ACCOUNTS["shopping_admin"]["password"] + page.goto(f"{SHOPPING_ADMIN}") + page.get_by_placeholder("user name").fill(username) + page.get_by_placeholder("password").fill(password) + page.get_by_role("button", name="Sign in").click() + + if "gitlab" in comb: + username = ACCOUNTS["gitlab"]["username"] + password = ACCOUNTS["gitlab"]["password"] + page.goto(f"{GITLAB}/users/sign_in") + page.get_by_test_id("username-field").click() + page.get_by_test_id("username-field").fill(username) + page.get_by_test_id("username-field").press("Tab") + page.get_by_test_id("password-field").fill(password) + page.get_by_test_id("sign-in-button").click() + + context.storage_state(path=f"{auth_folder}/{'.'.join(comb)}_state.json") + + context_manager.__exit__() + + +def get_site_comb_from_filepath(file_path: str) -> list[str]: + comb = os.path.basename(file_path).rsplit("_", 1)[0].split(".") + return comb + + +def main(auth_folder: str = "./.auth") -> None: + pairs = list(combinations(SITES, 2)) + + with ThreadPoolExecutor(max_workers=8) as executor: + for pair in pairs: + # Auth doesn't work on this pair as they share the same cookie + if "reddit" in pair and ( + "shopping" in pair or "shopping_admin" in pair + ): + continue + executor.submit( + renew_comb, list(sorted(pair)), auth_folder=auth_folder + ) + + for site in SITES: + executor.submit(renew_comb, [site], auth_folder=auth_folder) + + # parallel checking if the cookies are expired + futures = [] + cookie_files = list(glob.glob(f"{auth_folder}/*.json")) + with ThreadPoolExecutor(max_workers=8) as executor: + for c_file in cookie_files: + comb = get_site_comb_from_filepath(c_file) + for cur_site in comb: + url = URLS[SITES.index(cur_site)] + keyword = KEYWORDS[SITES.index(cur_site)] + match = EXACT_MATCH[SITES.index(cur_site)] + future = executor.submit( + is_expired, Path(c_file), url, keyword, match + ) + futures.append(future) + + for i, future in enumerate(futures): + assert not future.result(), f"Cookie {cookie_files[i]} expired." + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--site_list", nargs="+", default=[]) + parser.add_argument("--auth_folder", type=str, default="./.auth") + args = parser.parse_args() + if not args.site_list: + main() + else: + renew_comb(args.site_list, auth_folder=args.auth_folder) diff --git a/VAB-WebArena-Lite/browser_env/constants.py b/VAB-WebArena-Lite/browser_env/constants.py new file mode 100644 index 0000000..7feb253 --- /dev/null +++ b/VAB-WebArena-Lite/browser_env/constants.py @@ -0,0 +1,324 @@ +import re +from typing import Literal + +ROLES = ( + "alert", + "alertdialog", + "application", + "article", + "banner", + "blockquote", + "button", + "caption", + "cell", + "checkbox", + "code", + "columnheader", + "combobox", + "complementary", + "contentinfo", + "definition", + "deletion", + "dialog", + "directory", + "document", + "emphasis", + "feed", + "figure", + "form", + "generic", + "grid", + "gridcell", + "group", + "heading", + "img", + "insertion", + "link", + "list", + "listbox", + "listitem", + "log", + "main", + "marquee", + "math", + "meter", + "menu", + "menubar", + "menuitem", + "menuitemcheckbox", + "menuitemradio", + "navigation", + "none", + "note", + "option", + "paragraph", + "presentation", + "progressbar", + "radio", + "radiogroup", + "region", + "row", + "rowgroup", + "rowheader", + "scrollbar", + "search", + "searchbox", + "separator", + "slider", + "spinbutton", + "status", + "strong", + "subscript", + "superscript", + "switch", + "tab", + "table", + "tablist", + "tabpanel", + "term", + "textbox", + "time", + "timer", + "toolbar", + "tooltip", + "tree", + "treegrid", + "treeitem", +) + +SPECIAL_LOCATORS = ( + "alt_text", + "label", + "placeholder", +) + +ASCII_CHARSET = "".join(chr(x) for x in range(32, 128)) +FREQ_UNICODE_CHARSET = "".join(chr(x) for x in range(129, 130000)) +UTTERANCE_MAX_LENGTH = 8192 +ATTRIBUTE_MAX_LENGTH = 256 +TEXT_MAX_LENGTH = 256 +TYPING_MAX_LENGTH = 64 +URL_MAX_LENGTH = 256 +MAX_ELEMENT_INDEX_IN_VIEWPORT = 10 +MAX_ELEMENT_ID = 1000 +MAX_ANSWER_LENGTH = 512 + +MIN_REF = -1000000 +MAX_REF = 1000000 + +WINDOW_WIDTH = 500 +WINDOW_HEIGHT = 240 +TASK_WIDTH = 160 +TASK_HEIGHT = 210 + +FLIGHT_WINDOW_WIDTH = 600 +FLIGHT_WINDOW_HEIGHT = 700 +FLIGHT_TASK_WIDTH = 375 +FLIGHT_TASK_HEIGHT = 667 +MAX_PAGE_NUMBER = 10 + +SPECIAL_KEYS = ( + "Enter", + "Tab", + "Control", + "Shift", + "Meta", + "Backspace", + "Delete", + "Escape", + "ArrowUp", + "ArrowDown", + "ArrowLeft", + "ArrowRight", + "PageDown", + "PageUp", + "Meta+a", +) + +SPECIAL_KEY_MAPPINGS = { + "backquote": "Backquote", + "minus": "Minus", + "equal": "Equal", + "backslash": "Backslash", + "backspace": "Backspace", + "meta": "Meta", + "tab": "Tab", + "delete": "Delete", + "escape": "Escape", + "arrowdown": "ArrowDown", + "end": "End", + "enter": "Enter", + "home": "Home", + "insert": "Insert", + "pagedown": "PageDown", + "pageup": "PageUp", + "arrowright": "ArrowRight", + "arrowup": "ArrowUp", + "f1": "F1", + "f2": "F2", + "f3": "F3", + "f4": "F4", + "f5": "F5", + "f6": "F6", + "f7": "F7", + "f8": "F8", + "f9": "F9", + "f10": "F10", + "f11": "F11", + "f12": "F12", +} + +RolesType = Literal[ + "alert", + "alertdialog", + "application", + "article", + "banner", + "blockquote", + "button", + "caption", + "cell", + "checkbox", + "code", + "columnheader", + "combobox", + "complementary", + "contentinfo", + "definition", + "deletion", + "dialog", + "directory", + "document", + "emphasis", + "feed", + "figure", + "form", + "generic", + "grid", + "gridcell", + "group", + "heading", + "img", + "insertion", + "link", + "list", + "listbox", + "listitem", + "log", + "main", + "marquee", + "math", + "meter", + "menu", + "menubar", + "menuitem", + "menuitemcheckbox", + "menuitemradio", + "navigation", + "none", + "note", + "option", + "paragraph", + "presentation", + "progressbar", + "radio", + "radiogroup", + "region", + "row", + "rowgroup", + "rowheader", + "scrollbar", + "search", + "searchbox", + "separator", + "slider", + "spinbutton", + "status", + "strong", + "subscript", + "superscript", + "switch", + "tab", + "table", + "tablist", + "tabpanel", + "term", + "textbox", + "time", + "timer", + "toolbar", + "tooltip", + "tree", + "treegrid", + "treeitem", + "alt_text", + "label", + "placeholder", +] + +MAX_VANILLA_STR_LENGTH = 1000 + +PLAYWRIGHT_LOCATORS = ( + "get_by_role", + "get_by_text", + "get_by_label", + "get_by_placeholder", + "get_by_alt_text", + "get_by_title", + "get_by_test_id", + "filter", + "frame_locator", + "locator", +) + +PLAYWRIGHT_ACTIONS = ( + "fill", + "check", + "select_option", + "click", + "hover", + "dclick", + "type", + "focus", + "goto", + "press", + "scroll", +) + +IGNORED_ACTREE_PROPERTIES = ( + "focusable", + "editable", + "readonly", + "level", + "settable", + "multiline", + "invalid", +) + +INJECTED_ATTR_NAME = "aria-roledescription" +BID_ATTR = "bid" # the attribute name for extra meta data +BID_EXPR = r"([-0-9]+)" +FLOAT_EXPR = r"([+-]?(?:[0-9]*[.])?[0-9]+)" +BOOL_EXPR = r"([01])" + +DATA_REGEXP = re.compile( + BID_EXPR + + r"_" + + FLOAT_EXPR + + r"_" + + FLOAT_EXPR + + r"_" + + FLOAT_EXPR + + r"_" + + FLOAT_EXPR + + r"_" + + FLOAT_EXPR + + r"_" + + FLOAT_EXPR + + r"_" + + BOOL_EXPR + + r"_" + + r"(.*)" +) + +IN_VIEWPORT_RATIO_THRESHOLD = 0.6 \ No newline at end of file diff --git a/VAB-WebArena-Lite/browser_env/env_config.py b/VAB-WebArena-Lite/browser_env/env_config.py new file mode 100644 index 0000000..3ec81a1 --- /dev/null +++ b/VAB-WebArena-Lite/browser_env/env_config.py @@ -0,0 +1,97 @@ +# websites domain +import os + +DATASET = os.environ["DATASET"] +if DATASET not in ["webarena", "visualwebarena"]: + raise ValueError("Please set the DATASET environment variable, the possible options are `webarena`, `visualwebarena` and `miniwob++`") + +# WebArena +if DATASET == "webarena": + REDDIT = os.environ.get("REDDIT", "") + SHOPPING = os.environ.get("SHOPPING", "") + SHOPPING_ADMIN = os.environ.get("SHOPPING_ADMIN", "") + GITLAB = os.environ.get("GITLAB", "") + WIKIPEDIA = os.environ.get("WIKIPEDIA", "") + MAP = os.environ.get("MAP", "") + HOMEPAGE = os.environ.get("HOMEPAGE", "") + assert ( + REDDIT + and SHOPPING + and SHOPPING_ADMIN + and GITLAB + and WIKIPEDIA + and MAP + and HOMEPAGE + ), ( + f"Please setup the URLs to each site. Current: \n" + + f"Reddit: {REDDIT}\n" + + f"Shopping: {SHOPPING}\n" + + f"Shopping Admin: {SHOPPING_ADMIN}\n" + + f"Gitlab: {GITLAB}\n" + + f"Wikipedia: {WIKIPEDIA}\n" + + f"Map: {MAP}\n" + + f"Homepage: {HOMEPAGE}\n" + ) + + URL_MAPPINGS = { + REDDIT: "http://reddit.com", + SHOPPING: "http://onestopmarket.com", + SHOPPING_ADMIN: "http://luma.com/admin", + GITLAB: "http://gitlab.com", + WIKIPEDIA: "http://wikipedia.org", + MAP: "http://openstreetmap.org", + HOMEPAGE: "http://homepage.com", + } + +elif DATASET == "visualwebarena": + REDDIT = os.environ.get("REDDIT", "") + SHOPPING = os.environ.get("SHOPPING", "") + WIKIPEDIA = os.environ.get("WIKIPEDIA", "") + HOMEPAGE = os.environ.get("HOMEPAGE", "") + CLASSIFIEDS = os.environ.get("CLASSIFIEDS", "") + CLASSIFIEDS_RESET_TOKEN = os.environ.get("CLASSIFIEDS_RESET_TOKEN", "") + REDDIT_RESET_URL = os.environ.get("REDDIT_RESET_URL", "") + + assert ( + REDDIT + and SHOPPING + and WIKIPEDIA + and HOMEPAGE + and CLASSIFIEDS + and CLASSIFIEDS_RESET_TOKEN + ), ( + f"Please setup the URLs and tokens to each site. Current: " + + f"Reddit: {REDDIT}" + + f"Shopping: {SHOPPING}" + + f"Wikipedia: {WIKIPEDIA}" + + f"Homepage: {HOMEPAGE}" + + f"Classifieds: {CLASSIFIEDS}" + + f"Classifieds reset token: {CLASSIFIEDS_RESET_TOKEN}" + ) + + URL_MAPPINGS = { + REDDIT: "http://reddit.com", + SHOPPING: "http://onestopmarket.com", + WIKIPEDIA: "http://wikipedia.org", + HOMEPAGE: "http://homepage.com", + CLASSIFIEDS: "http://classifieds.com", + } + +else: + raise ValueError(f"Dataset not implemented: {DATASET}") + + +ACCOUNTS = { + "reddit": {"username": "MarvelsGrantMan136", "password": "test1234"}, + "shopping": { + "username": "emma.lopez@gmail.com", + "password": "Password.123", + }, + "classifieds": { + "username": "blake.sullivan@gmail.com", + "password": "Password.123", + }, + "shopping_site_admin": {"username": "admin", "password": "admin1234"}, + "shopping_admin": {"username": "admin", "password": "admin1234"}, + "gitlab": {"username": "byteblaze", "password": "hello1234"}, +} \ No newline at end of file diff --git a/VAB-WebArena-Lite/new/envs.py b/VAB-WebArena-Lite/browser_env/envs.py similarity index 97% rename from VAB-WebArena-Lite/new/envs.py rename to VAB-WebArena-Lite/browser_env/envs.py index 2dfe7dc..fa102b2 100644 --- a/VAB-WebArena-Lite/new/envs.py +++ b/VAB-WebArena-Lite/browser_env/envs.py @@ -94,6 +94,7 @@ class ScriptBrowserEnv(Env[dict[str, Observation], Action]): save_trace_enabled: bool = False, sleep_after_execution: float = 0.0, captioning_fn=None, + proxy_url: str = "", ): # TODO: make Space[Action] = ActionSpace self.action_space = get_action_space() # type: ignore[assignment] @@ -104,6 +105,7 @@ class ScriptBrowserEnv(Env[dict[str, Observation], Action]): self.viewport_size = viewport_size self.save_trace_enabled = save_trace_enabled self.sleep_after_execution = sleep_after_execution + self.proxy_url = proxy_url match observation_type: case "html" | "accessibility_tree" | "accessibility_tree_with_captioner" | "webrl": @@ -187,6 +189,12 @@ class ScriptBrowserEnv(Env[dict[str, Observation], Action]): storage_state=storage_state, geolocation=geolocation, device_scale_factor=1, + proxy={ + "server": self.proxy_url, + "bypass": "127.0.0.1,localhost", + } + if self.proxy_url + else None, ) if self.save_trace_enabled: self.context.tracing.start(screenshots=True, snapshots=True) diff --git a/VAB-WebArena-Lite/new/helper_functions_browser.py b/VAB-WebArena-Lite/browser_env/helper_functions.py similarity index 100% rename from VAB-WebArena-Lite/new/helper_functions_browser.py rename to VAB-WebArena-Lite/browser_env/helper_functions.py diff --git a/VAB-WebArena-Lite/new/html_tools/__init__.py b/VAB-WebArena-Lite/browser_env/html_tools/__init__.py similarity index 100% rename from VAB-WebArena-Lite/new/html_tools/__init__.py rename to VAB-WebArena-Lite/browser_env/html_tools/__init__.py diff --git a/VAB-WebArena-Lite/new/html_tools/configs/__init__.py b/VAB-WebArena-Lite/browser_env/html_tools/configs/__init__.py similarity index 100% rename from VAB-WebArena-Lite/new/html_tools/configs/__init__.py rename to VAB-WebArena-Lite/browser_env/html_tools/configs/__init__.py diff --git a/VAB-WebArena-Lite/new/html_tools/configs/config.py b/VAB-WebArena-Lite/browser_env/html_tools/configs/config.py similarity index 100% rename from VAB-WebArena-Lite/new/html_tools/configs/config.py rename to VAB-WebArena-Lite/browser_env/html_tools/configs/config.py diff --git a/VAB-WebArena-Lite/new/html_tools/configs/html_prompt.py b/VAB-WebArena-Lite/browser_env/html_tools/configs/html_prompt.py similarity index 100% rename from VAB-WebArena-Lite/new/html_tools/configs/html_prompt.py rename to VAB-WebArena-Lite/browser_env/html_tools/configs/html_prompt.py diff --git a/VAB-WebArena-Lite/new/html_tools/fetch.py b/VAB-WebArena-Lite/browser_env/html_tools/fetch.py similarity index 100% rename from VAB-WebArena-Lite/new/html_tools/fetch.py rename to VAB-WebArena-Lite/browser_env/html_tools/fetch.py diff --git a/VAB-WebArena-Lite/new/html_tools/html_parser.py b/VAB-WebArena-Lite/browser_env/html_tools/html_parser.py similarity index 100% rename from VAB-WebArena-Lite/new/html_tools/html_parser.py rename to VAB-WebArena-Lite/browser_env/html_tools/html_parser.py diff --git a/VAB-WebArena-Lite/new/html_tools/identifier.py b/VAB-WebArena-Lite/browser_env/html_tools/identifier.py similarity index 100% rename from VAB-WebArena-Lite/new/html_tools/identifier.py rename to VAB-WebArena-Lite/browser_env/html_tools/identifier.py diff --git a/VAB-WebArena-Lite/new/html_tools/prompt.py b/VAB-WebArena-Lite/browser_env/html_tools/prompt.py similarity index 100% rename from VAB-WebArena-Lite/new/html_tools/prompt.py rename to VAB-WebArena-Lite/browser_env/html_tools/prompt.py diff --git a/VAB-WebArena-Lite/new/html_tools/scripts/__init__.py b/VAB-WebArena-Lite/browser_env/html_tools/scripts/__init__.py similarity index 100% rename from VAB-WebArena-Lite/new/html_tools/scripts/__init__.py rename to VAB-WebArena-Lite/browser_env/html_tools/scripts/__init__.py diff --git a/VAB-WebArena-Lite/new/html_tools/scripts/clickable_checker.js b/VAB-WebArena-Lite/browser_env/html_tools/scripts/clickable_checker.js similarity index 100% rename from VAB-WebArena-Lite/new/html_tools/scripts/clickable_checker.js rename to VAB-WebArena-Lite/browser_env/html_tools/scripts/clickable_checker.js diff --git a/VAB-WebArena-Lite/new/html_tools/scripts/element_info.js b/VAB-WebArena-Lite/browser_env/html_tools/scripts/element_info.js similarity index 100% rename from VAB-WebArena-Lite/new/html_tools/scripts/element_info.js rename to VAB-WebArena-Lite/browser_env/html_tools/scripts/element_info.js diff --git a/VAB-WebArena-Lite/new/html_tools/scripts/label.js b/VAB-WebArena-Lite/browser_env/html_tools/scripts/label.js similarity index 100% rename from VAB-WebArena-Lite/new/html_tools/scripts/label.js rename to VAB-WebArena-Lite/browser_env/html_tools/scripts/label.js diff --git a/VAB-WebArena-Lite/new/html_tools/scripts/label_marker.js b/VAB-WebArena-Lite/browser_env/html_tools/scripts/label_marker.js similarity index 100% rename from VAB-WebArena-Lite/new/html_tools/scripts/label_marker.js rename to VAB-WebArena-Lite/browser_env/html_tools/scripts/label_marker.js diff --git a/VAB-WebArena-Lite/new/html_tools/scripts/prepare.js b/VAB-WebArena-Lite/browser_env/html_tools/scripts/prepare.js similarity index 100% rename from VAB-WebArena-Lite/new/html_tools/scripts/prepare.js rename to VAB-WebArena-Lite/browser_env/html_tools/scripts/prepare.js diff --git a/VAB-WebArena-Lite/new/html_tools/utils.py b/VAB-WebArena-Lite/browser_env/html_tools/utils.py similarity index 100% rename from VAB-WebArena-Lite/new/html_tools/utils.py rename to VAB-WebArena-Lite/browser_env/html_tools/utils.py diff --git a/VAB-WebArena-Lite/browser_env/javascript/frame_mark_elements.js b/VAB-WebArena-Lite/browser_env/javascript/frame_mark_elements.js new file mode 100644 index 0000000..2f26af6 --- /dev/null +++ b/VAB-WebArena-Lite/browser_env/javascript/frame_mark_elements.js @@ -0,0 +1,192 @@ +/** + * Go through all DOM elements in the frame (including shadowDOMs), give them unique browsergym + * identifiers (bid), and store custom data in the aria-roledescription attribute. + */ +var { innerWidth: windowWidth, innerHeight: windowHeight } = window; +var scrollX = window.scrollX || document.documentElement.scrollLeft; +var scrollY = window.scrollY || document.documentElement.scrollTop; + +([parent_bid, bid_attr_name, iframe_position, super_iframe_offset]) => { + + // standard html tags + // https://www.w3schools.com/tags/ + const html_tags = [ + "a", "abbr", "acronym", "address", "applet", "area", "article", "aside", "audio", + "b", "base", "basefont", "bdi", "bdo", "big", "blockquote", "body", "br", "button", + "canvas", "caption", "center", "cite", "code", "col", "colgroup", "data", "datalist", + "dd", "del", "details", "dfn", "dialog", "dir", "div", "dl", "dt", "em", "embed", + "fieldset", "figcaption", "figure", "font", "footer", "form", "frame", "frameset", + "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html", "i", + "iframe", "img", "input", "ins", "kbd", "label", "legend", "li", "link", "main", + "map", "mark", "menu", "meta", "meter", "nav", "noframes", "noscript", "object", + "ol", "optgroup", "option", "output", "p", "param", "picture", "pre", "progress", + "q", "rp", "rt", "ruby", "s", "samp", "script", "search", "section", "select", + "small", "source", "span", "strike", "strong", "style", "sub", "summary", "sup", + "svg", "table", "tbody", "td", "template", "textarea", "tfoot", "th", "thead", + "time", "title", "tr", "track", "tt", "u", "ul", "var", "video", "wbr" + ]; + + if (super_iframe_offset == null) { + + iframe_offset = { x: scrollX, y: scrollY, right: windowWidth, bottom: windowHeight }; + } + else { + [super_x, super_y, super_right, super_bottom] = [super_iframe_offset["x"], super_iframe_offset["y"], super_iframe_offset["right"], super_iframe_offset["bottom"]]; + + x = Math.max(-iframe_position.x, 0); + y = Math.max(-iframe_position.y, 0); + right = Math.min(...[super_right, windowWidth, super_right - iframe_position.x]); + bottom = Math.min(...[super_bottom, windowHeight, super_bottom - iframe_position.y]); + iframe_offset = { x: x, y: y, right: right, bottom: bottom }; + } + + let browsergym_first_visit = false; + // if no yet set, set the frame (local) element counter to 0 + if (!("browsergym_frame_elem_counter" in window)) { + window.browsergym_frame_elem_counter = 0; + browsergym_first_visit = true; + } + + // get all DOM elements in the current frame (does not include elements in shadowDOMs) + let elements = Array.from(document.querySelectorAll('*')); + i = 0; + while (i < elements.length) { + const elem = elements[i]; + // add shadowDOM elements to the elements array, in such a way that order is preserved + // TODO: do we really need the order preserved? + if (elem.shadowRoot !== null) { + elements = new Array( + ...Array.prototype.slice.call(elements, 0, i + 1), + ...Array.from(elem.shadowRoot.querySelectorAll("*")), + ...Array.prototype.slice.call(elements, i + 1) + ); + } + i++; + // we will mark only standard HTML tags + if (!elem.tagName || !html_tags.includes(elem.tagName.toLowerCase())) { + // console.log(`Skipping element ${elem.outerHTML}`) + continue; // stop and move on to the next element + } + // console.log(`Processing element ${elem.outerHTML}`) + // write dynamic element values to the DOM + if (typeof elem.value !== 'undefined') { + elem.setAttribute("value", elem.value); + } + // write dynamic checked properties to the DOM + if (typeof elem.checked !== 'undefined') { + if (elem.checked === true) { + elem.setAttribute("checked", ""); + } + else { + elem.removeAttribute("checked"); + } + } + // add the element global id to a custom HTML attribute + // https://playwright.dev/docs/locators#locate-by-test-id + // recover the element id if it has one already, else compute a new element id + let elem_global_bid; + if (elem.hasAttribute(bid_attr_name)) { + // throw an error if the attribute is already set while this is the first visit of the page + if (browsergym_first_visit) { + throw new Error(`Attribute ${bid_attr_name} already used in element ${elem.outerHTML}`); + } + elem_global_bid = elem.getAttribute(bid_attr_name); + } + else { + let elem_local_id = window.browsergym_frame_elem_counter++; + if (parent_bid == "") { + elem_global_bid = `${elem_local_id}`; + } + else { + elem_global_bid = `${parent_bid}-${elem_local_id}`; + } + elem.setAttribute(bid_attr_name, `${elem_global_bid}`); + } + // Hack: store custom data inside the aria-roledescription attribute (will be available in DOM and AXTree) + // - elem_global_bid: global element identifier (unique over multiple frames) + // TODO: add more data if needed (x, y coordinates, bounding box, is_visible, is_clickable etc.) + + let [rect, is_in_viewport] = getElementPositionInfo(elem, iframe_offset, iframe_position); + let left = (rect.left + iframe_position.x).toString(); + let top = (rect.top + iframe_position.y ).toString(); + let right = (rect.right + iframe_position.x ).toString(); + let bottom = (rect.bottom + iframe_position.y).toString(); + let center_x = ((rect.left + rect.right) / 2 + iframe_position.x).toString(); + let center_y = ((rect.top + rect.bottom) / 2 + iframe_position.y).toString(); + + elem.setAttribute("browsergym_center", `(${center_x}, ${center_y})`); + elem.setAttribute("browsergym_bounding_box", `(${left}, ${top}, ${right}, ${bottom})`); + elem.setAttribute("browsergym_is_in_viewport", `${is_in_viewport}`); + + let original_content = ""; + if (elem.hasAttribute("aria-roledescription")) { + original_content = elem.getAttribute("aria-roledescription"); + } + let new_content = `${elem_global_bid}_${left}_${top}_${center_x}_${center_y}_${right}_${bottom}_${is_in_viewport}_${original_content}` + elem.setAttribute("aria-roledescription", new_content); + + } + return iframe_offset; + +} +function getElementPositionInfo(element, iframe_offset, iframe_position) { + var rect = element.getBoundingClientRect(); + let x = (rect.left + rect.right) / 2 ; + let y = (rect.top + rect.bottom) / 2 ; + //loop over element ancestors (parent) and refine iframe offset to be the most precise possible + var parent = element.parentElement; + parent_iframe_offset = { x: 0, y: 0, right: windowWidth, bottom: windowHeight }; + while (parent !== null) { + var parent_rect = parent.getBoundingClientRect(); + parent_iframe_offset["x"] = Math.max(parent_rect.left , parent_iframe_offset["x"] ); + parent_iframe_offset["y"] = Math.max(parent_rect.top , parent_iframe_offset["y"] ); + parent_iframe_offset["right"] = Math.min(parent_rect.right , parent_iframe_offset["right"] ); + parent_iframe_offset["bottom"] = Math.min(parent_rect.bottom , parent_iframe_offset["bottom"] ); + parent = parent.parentElement; + } + + var is_in_viewport = ( + x >= iframe_offset["x"] && + y >= iframe_offset["y"] && + x <= iframe_offset["right"] && + y <= iframe_offset["bottom"] + ); + //this features is broken for the moment + var NotBehindParent = ( + x >= parent_iframe_offset["x"] && + y >= parent_iframe_offset["y"] && + x <= parent_iframe_offset["right"] && + y <= parent_iframe_offset["bottom"] + ); + + var isVisible = (typeof element.offsetWidth === 'undefined' || typeof element.offsetHeight === 'undefined') || (element.offsetWidth > 0 && element.offsetHeight > 0); + + // Return true if the element is both in the viewport and has non-zero dimensions + return [rect, (is_in_viewport && isVisible && IsInFront(element))? 1 : 0]; +} + + +function IsInFront(element){ + var rect = element.getBoundingClientRect(); + var x = (rect.left + rect.right) / 2 ; + var y = (rect.top + rect.bottom) / 2 ; + var newElement = elementFromPoint(x, y); //return the element in the foreground at position (x,y) + if(newElement){ + if(newElement === element) + return true; + } + return false; +} + +function elementFromPoint(x, y) { + let node = document.elementFromPoint(x, y); + + let child = node?.shadowRoot?.elementFromPoint(x, y); + + while (child && child !== node) { + node = child; + child = node?.shadowRoot?.elementFromPoint(x, y); + } + + return child || node; + } \ No newline at end of file diff --git a/VAB-WebArena-Lite/browser_env/javascript/frame_unmark_elements.js b/VAB-WebArena-Lite/browser_env/javascript/frame_unmark_elements.js new file mode 100644 index 0000000..78adadd --- /dev/null +++ b/VAB-WebArena-Lite/browser_env/javascript/frame_unmark_elements.js @@ -0,0 +1,41 @@ +/** + * Go through all DOM elements in the frame (including shadowDOMs), + * and cleanup previously stored data in the aria-roledescription attribute. + */ +() => { + // get all DOM elements in the current frame (does not include elements in shadowDOMs) + let elements = Array.from(document.querySelectorAll('*')); + let i = 0; + while (i < elements.length) { + const elem = elements[i]; + // add shadowDOM elements to the elements array, in such a way that order is preserved + // TODO: do we really need the order preserved? + if (elem.shadowRoot !== null) { + elements = new Array( + ...Array.prototype.slice.call(elements, 0, i + 1), + ...Array.from(elem.shadowRoot.querySelectorAll("*")), + ...Array.prototype.slice.call(elements, i + 1) + ); + } + i++; + // Hack: remove custom data stored inside the aria-roledescription tag + // - elem_global_id: global browsergym identifier + if (elem.hasAttribute("aria-roledescription")) { + let content = elem.getAttribute("aria-roledescription"); + // TODO: handle more data if needed + let n_data_items = 8; // bid, bbox_left, bbox_top, center_x, center_y, bbox_right, bbox_bottom, is_in_viewport + let post_data_index = 0; + for (let j = 0 ; j < n_data_items ; j++) { + post_data_index = content.indexOf("_", post_data_index) + 1; + } + original_content = content.substring(post_data_index); + if (original_content) { + elem.setAttribute("aria-roledescription", original_content); + } + else { + elem.removeAttribute("aria-roledescription"); + } + + } + } +} \ No newline at end of file diff --git a/VAB-WebArena-Lite/new/processors.py b/VAB-WebArena-Lite/browser_env/processors.py similarity index 99% rename from VAB-WebArena-Lite/new/processors.py rename to VAB-WebArena-Lite/browser_env/processors.py index d3f422d..ad344de 100644 --- a/VAB-WebArena-Lite/new/processors.py +++ b/VAB-WebArena-Lite/browser_env/processors.py @@ -1114,7 +1114,7 @@ class ImageObservationProcessor(ObservationProcessor): try: browser_info = self.fetch_browser_info(page) except Exception: - page.wait_for_load_state("load", timeout=500) + page.wait_for_load_state("load", timeout=30000) # 500->30000, modified by yuyr browser_info = self.fetch_browser_info(page) self.browser_config = browser_info["config"] diff --git a/VAB-WebArena-Lite/browser_env/py.typed b/VAB-WebArena-Lite/browser_env/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/VAB-WebArena-Lite/browser_env/trajectory.py b/VAB-WebArena-Lite/browser_env/trajectory.py new file mode 100644 index 0000000..1c4c410 --- /dev/null +++ b/VAB-WebArena-Lite/browser_env/trajectory.py @@ -0,0 +1,6 @@ +from typing import Union + +from .actions import Action +from .utils import StateInfo + +Trajectory = list[Union[StateInfo, Action]] diff --git a/VAB-WebArena-Lite/browser_env/utils.py b/VAB-WebArena-Lite/browser_env/utils.py new file mode 100644 index 0000000..957e15f --- /dev/null +++ b/VAB-WebArena-Lite/browser_env/utils.py @@ -0,0 +1,106 @@ +import base64 +from dataclasses import dataclass +from io import BytesIO +from typing import Any, Dict, TypedDict, Union + +import numpy as np +import numpy.typing as npt +from beartype import beartype +from PIL import Image + +try: + from vertexai.preview.generative_models import Image as VertexImage +except: + print('Google Cloud not set up, skipping import of vertexai.preview.generative_models.Image') + + +@dataclass +class DetachedPage: + url: str + content: str # html + + +@beartype +def png_bytes_to_numpy(png: bytes) -> npt.NDArray[np.uint8]: + """Convert png bytes to numpy array + + Example: + + >>> fig = go.Figure(go.Scatter(x=[1], y=[1])) + >>> plt.imshow(png_bytes_to_numpy(fig.to_image('png'))) + """ + return np.array(Image.open(BytesIO(png))) + + +def pil_to_b64(img: Image.Image) -> str: + with BytesIO() as image_buffer: + img.save(image_buffer, format="PNG") + byte_data = image_buffer.getvalue() + img_b64 = base64.b64encode(byte_data).decode("utf-8") + img_b64 = "data:image/png;base64," + img_b64 + return img_b64 + + +def pil_to_vertex(img: Image.Image) -> str: + with BytesIO() as image_buffer: + img.save(image_buffer, format="PNG") + byte_data = image_buffer.getvalue() + img_vertex = VertexImage.from_bytes(byte_data) + return img_vertex + + +class DOMNode(TypedDict): + nodeId: str + nodeType: str + nodeName: str + nodeValue: str + attributes: str + backendNodeId: str + parentId: str + childIds: list[str] + cursor: int + union_bound: list[float] | None + center: list[float] | None + + +class AccessibilityTreeNode(TypedDict): + nodeId: str + ignored: bool + role: dict[str, Any] + chromeRole: dict[str, Any] + name: dict[str, Any] + properties: list[dict[str, Any]] + childIds: list[str] + parentId: str + backendDOMNodeId: int + frameId: str + bound: list[float] | None + union_bound: list[float] | None + offsetrect_bound: list[float] | None + center: list[float] | None + + +class BrowserConfig(TypedDict): + win_upper_bound: float + win_left_bound: float + win_width: float + win_height: float + win_right_bound: float + win_lower_bound: float + device_pixel_ratio: float + + +class BrowserInfo(TypedDict): + DOMTree: dict[str, Any] + config: BrowserConfig + + +AccessibilityTree = list[AccessibilityTreeNode] +DOMTree = list[DOMNode] + +Observation = str | npt.NDArray[np.uint8] + + +class StateInfo(TypedDict): + observation: dict[str, Observation] + info: Dict[str, Any] diff --git a/VAB-WebArena-Lite/coco_images/000000000285.jpg b/VAB-WebArena-Lite/coco_images/000000000285.jpg new file mode 100644 index 0000000..7d76080 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000000285.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000022371.jpg b/VAB-WebArena-Lite/coco_images/000000022371.jpg new file mode 100644 index 0000000..6a044d3 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000022371.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000023781.jpg b/VAB-WebArena-Lite/coco_images/000000023781.jpg new file mode 100644 index 0000000..224dd31 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000023781.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000024567.jpg b/VAB-WebArena-Lite/coco_images/000000024567.jpg new file mode 100644 index 0000000..60237fe Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000024567.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000029675.jpg b/VAB-WebArena-Lite/coco_images/000000029675.jpg new file mode 100644 index 0000000..915fcc0 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000029675.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000029984.jpg b/VAB-WebArena-Lite/coco_images/000000029984.jpg new file mode 100644 index 0000000..49f1c5a Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000029984.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000039551.jpg b/VAB-WebArena-Lite/coco_images/000000039551.jpg new file mode 100644 index 0000000..d6244d4 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000039551.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000048153.jpg b/VAB-WebArena-Lite/coco_images/000000048153.jpg new file mode 100644 index 0000000..39a9c6f Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000048153.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000048396.jpg b/VAB-WebArena-Lite/coco_images/000000048396.jpg new file mode 100644 index 0000000..7c12b86 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000048396.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000048924.jpg b/VAB-WebArena-Lite/coco_images/000000048924.jpg new file mode 100644 index 0000000..ad02594 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000048924.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000050331.jpg b/VAB-WebArena-Lite/coco_images/000000050331.jpg new file mode 100644 index 0000000..daaf301 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000050331.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000058539.jpg b/VAB-WebArena-Lite/coco_images/000000058539.jpg new file mode 100644 index 0000000..8fae15a Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000058539.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000058705.jpg b/VAB-WebArena-Lite/coco_images/000000058705.jpg new file mode 100644 index 0000000..7a84670 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000058705.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000060899.jpg b/VAB-WebArena-Lite/coco_images/000000060899.jpg new file mode 100644 index 0000000..1457bd7 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000060899.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000064574.jpg b/VAB-WebArena-Lite/coco_images/000000064574.jpg new file mode 100644 index 0000000..a456116 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000064574.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000064718.jpg b/VAB-WebArena-Lite/coco_images/000000064718.jpg new file mode 100644 index 0000000..8d025dc Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000064718.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000066771.jpg b/VAB-WebArena-Lite/coco_images/000000066771.jpg new file mode 100644 index 0000000..9af2aba Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000066771.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000084477.jpg b/VAB-WebArena-Lite/coco_images/000000084477.jpg new file mode 100644 index 0000000..91bc6bc Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000084477.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000097278.jpg b/VAB-WebArena-Lite/coco_images/000000097278.jpg new file mode 100644 index 0000000..9a5a009 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000097278.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000102805.jpg b/VAB-WebArena-Lite/coco_images/000000102805.jpg new file mode 100644 index 0000000..0c3df96 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000102805.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000136772.jpg b/VAB-WebArena-Lite/coco_images/000000136772.jpg new file mode 100644 index 0000000..4ac0ea1 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000136772.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000161032.jpg b/VAB-WebArena-Lite/coco_images/000000161032.jpg new file mode 100644 index 0000000..5e4b77d Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000161032.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000162415.jpg b/VAB-WebArena-Lite/coco_images/000000162415.jpg new file mode 100644 index 0000000..5200822 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000162415.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000163257.jpg b/VAB-WebArena-Lite/coco_images/000000163257.jpg new file mode 100644 index 0000000..5decfa6 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000163257.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000163682.jpg b/VAB-WebArena-Lite/coco_images/000000163682.jpg new file mode 100644 index 0000000..7efc9f2 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000163682.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000166768.jpg b/VAB-WebArena-Lite/coco_images/000000166768.jpg new file mode 100644 index 0000000..0d4790e Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000166768.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000167240.jpg b/VAB-WebArena-Lite/coco_images/000000167240.jpg new file mode 100644 index 0000000..f4533ec Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000167240.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000167353.jpg b/VAB-WebArena-Lite/coco_images/000000167353.jpg new file mode 100644 index 0000000..30536e9 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000167353.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000172595.jpg b/VAB-WebArena-Lite/coco_images/000000172595.jpg new file mode 100644 index 0000000..8d372fd Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000172595.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000173008.jpg b/VAB-WebArena-Lite/coco_images/000000173008.jpg new file mode 100644 index 0000000..b313679 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000173008.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000175443.jpg b/VAB-WebArena-Lite/coco_images/000000175443.jpg new file mode 100644 index 0000000..51cabfb Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000175443.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000176232.jpg b/VAB-WebArena-Lite/coco_images/000000176232.jpg new file mode 100644 index 0000000..efba018 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000176232.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000286708.jpg b/VAB-WebArena-Lite/coco_images/000000286708.jpg new file mode 100644 index 0000000..b14d90b Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000286708.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000324258.jpg b/VAB-WebArena-Lite/coco_images/000000324258.jpg new file mode 100644 index 0000000..78e5e58 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000324258.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000324715.jpg b/VAB-WebArena-Lite/coco_images/000000324715.jpg new file mode 100644 index 0000000..0613917 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000324715.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000327601.jpg b/VAB-WebArena-Lite/coco_images/000000327601.jpg new file mode 100644 index 0000000..c4fcaad Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000327601.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000329041.jpg b/VAB-WebArena-Lite/coco_images/000000329041.jpg new file mode 100644 index 0000000..261cbf4 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000329041.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000348012.jpg b/VAB-WebArena-Lite/coco_images/000000348012.jpg new file mode 100644 index 0000000..13dc6cb Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000348012.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000399655.jpg b/VAB-WebArena-Lite/coco_images/000000399655.jpg new file mode 100644 index 0000000..43196a8 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000399655.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000415238.jpg b/VAB-WebArena-Lite/coco_images/000000415238.jpg new file mode 100644 index 0000000..05084f7 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000415238.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000433515.jpg b/VAB-WebArena-Lite/coco_images/000000433515.jpg new file mode 100644 index 0000000..ba792e9 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000433515.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000440507.jpg b/VAB-WebArena-Lite/coco_images/000000440507.jpg new file mode 100644 index 0000000..ebabc5b Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000440507.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000444275.jpg b/VAB-WebArena-Lite/coco_images/000000444275.jpg new file mode 100644 index 0000000..df00c51 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000444275.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000513484.jpg b/VAB-WebArena-Lite/coco_images/000000513484.jpg new file mode 100644 index 0000000..459f4f0 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000513484.jpg differ diff --git a/VAB-WebArena-Lite/coco_images/000000515982.jpg b/VAB-WebArena-Lite/coco_images/000000515982.jpg new file mode 100644 index 0000000..f1fc5c1 Binary files /dev/null and b/VAB-WebArena-Lite/coco_images/000000515982.jpg differ diff --git a/VAB-WebArena-Lite/config_files/vwa/test_classifieds.raw.json b/VAB-WebArena-Lite/config_files/vwa/test_classifieds.raw.json new file mode 100644 index 0000000..b35b2e9 --- /dev/null +++ b/VAB-WebArena-Lite/config_files/vwa/test_classifieds.raw.json @@ -0,0 +1,8453 @@ +[ + { + "sites": [ + "classifieds" + ], + "task_id": 0, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find me the {{attribute}} {{item}} on this site.", + "intent": "Find me the cheapest blue kayak on this site.", + "image": null, + "instantiation_dict": { + "attribute": "cheapest", + "item": "blue kayak" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=4799", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 0 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 1, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find me the {{attribute}} {{item}}. It should be between {{range}}.", + "intent": "Find me the cheapest red Toyota. It should be between $3000 to $6000.", + "image": null, + "instantiation_dict": { + "attribute": "cheapest", + "item": "red Toyota", + "range": "$3000 to $6000" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=35838", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 1 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 2, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find me the {{attribute}} {{item}}. It should be between {{range}}.", + "intent": "Find me the most recently posted item in the \"Jewelry\" category with a red gem. It should be between $300 to $600.", + "image": null, + "instantiation_dict": { + "attribute": "most recently posted", + "item": "item in the \"Jewelry\" category with a red gem", + "range": "$300 to $600" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=10066", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 1 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 3, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find me the {{attribute}} {{item}}. It should be between {{range}}.", + "intent": "Find me the most recently posted item in the \"Photo + video\" category that is not completely black in color. It should be between $1000 to $1200.", + "image": null, + "instantiation_dict": { + "attribute": "most recently posted", + "item": "item in the \"Photo + video\" category that is not completely black in color", + "range": "$1000 to $1200" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=39343", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 1 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 4, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to my listing {{item}} and {{action}}.", + "intent": "Navigate to my listing of the white car and change the price to $25000. Update the price in the description as well.", + "image": null, + "instantiation_dict": { + "item": "of the white car", + "action": "change the price to $25000. Update the price in the description as well" + }, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=84144", + "locator": "func:get_query_text(__page__, '.price')", + "required_contents": { + "must_include": [ + "25000.00" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=84144", + "locator": "func:get_query_text(__page__, '.desc')", + "required_contents": { + "must_include": [ + "$25000 |OR| $25,000" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=84144", + "locator": "func:get_query_text(__page__, '.desc')", + "required_contents": { + "must_exclude": [ + "30000", + "30,000" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 2 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 5, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to my listing {{item}} and {{action}}.", + "intent": "Navigate to my listing of the white car and delete it.", + "image": null, + "instantiation_dict": { + "item": "of the white car", + "action": "delete it" + }, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=84144", + "locator": "", + "required_contents": { + "must_include": [ + "404" + ] + } + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 2 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 6, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Return the links of the {{attribute}} {{items}}.", + "intent": "Return the links of the 3 most recent motorcycles within $1000 to $2000 that are not orange.", + "image": null, + "instantiation_dict": { + "attribute": "3 most recent", + "items": "motorcycles within $1000 to $2000 that are not orange" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "__CLASSIFIEDS__/index.php?page=item&id=4208", + "__CLASSIFIEDS__/index.php?page=item&id=65357", + "__CLASSIFIEDS__/index.php?page=item&id=11549" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 3 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 7, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Return the links of the {{attribute}} {{items}}.", + "intent": "Return the links of the 2 most recent items in the \"Cell phones\" category within $300 to $600 that are white in color.", + "image": null, + "instantiation_dict": { + "attribute": "2 most recent", + "items": "items in the \"Cell phones\" category within $300 to $600 that are white in color" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "__CLASSIFIEDS__/index.php?page=item&id=40404", + "__CLASSIFIEDS__/index.php?page=item&id=38125" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 3 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 8, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Help me make a post selling this item and navigate to it. Price it at $10 cheaper than the most similar item on the site {{range}} (of the same color). If there are multiple similar items, use the most recent posting.", + "intent": "Help me make a post selling this item and navigate to it. Price it at $10 cheaper than the most similar item on the site between $250-300 (of the same color). If there are multiple similar items, use the most recent posting.", + "image": "https://images.pexels.com/photos/6869207/pexels-photo-6869207.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "range": "between $250-300" + }, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html", + "page_image_query" + ], + "reference_answers": null, + "program_html": [ + { + "url": "last", + "locator": "func:get_query_text(__page__, '.price')", + "required_contents": { + "must_include": [ + "270.00" + ] + } + } + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".item-photos", + "eval_fuzzy_image_match": "https://images.pexels.com/photos/6869207/pexels-photo-6869207.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2" + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "Closest item: __CLASSIFIEDS__/index.php?page=item&id=81838 or __CLASSIFIEDS__/index.php?page=item&id=32109", + "intent_template_id": 4 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 9, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Help me make a post selling this item and navigate to it. Price it at $10 cheaper than the most similar item on the site {{range}} (of the same color). If there are multiple similar items, use the most recent posting.", + "intent": "Help me make a post selling this item and navigate to it. Price it at $10 cheaper than the most similar item on the site between $750-800 (of the same color). If there are multiple similar items, use the most recent posting.", + "image": "__CLASSIFIEDS__/oc-content/uploads/42906/42906.png", + "instantiation_dict": { + "range": "between $750-800" + }, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html", + "page_image_query" + ], + "reference_answers": null, + "program_html": [ + { + "url": "last", + "locator": "func:get_query_text(__page__, '.price')", + "required_contents": { + "must_include": [ + "785.00" + ] + } + } + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".item-photos", + "eval_fuzzy_image_match": "__CLASSIFIEDS__/oc-content/uploads/42906/42906.png" + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "Closest item: __CLASSIFIEDS__/index.php?page=item&id=42906", + "intent_template_id": 4 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 10, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "What is the {{attribute}} of {{item}}?", + "intent": "What is the seat height in inches of the smaller piece of furniture on this page?", + "image": null, + "instantiation_dict": { + "attribute": "seat height in inches", + "item": "the smaller piece of furniture on this page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "21" + ] + }, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=43887", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 5 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 11, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=7", + "geolocation": null, + "intent_template": "What is the {{attribute}} of {{item}}?", + "intent": "What is the size of the wheels in inches of the first blue bike on this page?", + "image": null, + "instantiation_dict": { + "attribute": "size of the wheels in inches", + "item": "the first blue bike on this page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "26" + ] + }, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=66613", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 5 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 12, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "What is the {{attribute}} of {{item}}?", + "intent": "What is the color of most recently listed motorcycle?", + "image": null, + "instantiation_dict": { + "attribute": "color", + "item": "most recently listed motorcycle" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "red" + ] + }, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=9068", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 5 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 13, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "What is the {{attribute}} of {{item}}?", + "intent": "What is the color of the most expensive item in the \"Boats\" category?", + "image": null, + "instantiation_dict": { + "attribute": "color", + "item": "the most expensive item in the \"Boats\" category" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "white" + ] + }, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=40932", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 5 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 14, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=4&iPage=2&sShowAs=gallery", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "What is the email of the seller of {{item}}?", + "intent": "What is the email of the seller of the painting in the second row?", + "image": null, + "instantiation_dict": { + "item": "the painting in the second row" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "olga.jones341@example.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 6 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 15, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=21", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "What is the email of the seller of {{item}} on this page?", + "intent": "What is the email of the seller of the guitar in the red case on this page?", + "image": null, + "instantiation_dict": { + "item": "the guitar in the red case" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "haruto_abdullah497@example.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=67527", + "intent_template_id": 7 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 16, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=3", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "What is the email of the seller of {{item}} on this page?", + "intent": "What is the email of the seller of the item with the coffee mug in the picture on this page?", + "image": null, + "instantiation_dict": { + "item": "the item with the coffee mug in the picture" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "layla_garcia352@example.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=15810", + "intent_template_id": 7 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 17, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "Show me the {{item}}.", + "intent": "Show me the cheapest bike with red handlebars between $900-950.", + "image": null, + "instantiation_dict": { + "item": "cheapest bike with red handlebars between $900-950" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=79747 |OR| __CLASSIFIEDS__/index.php?page=item&id=11376", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 8 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 18, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "Show me the {{item}}.", + "intent": "Show me the most recently posted blue iPhone.", + "image": null, + "instantiation_dict": { + "item": "most recently posted blue iPhone" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=38173", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 8 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 19, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "Show me the {{item}}.", + "intent": "Show me the most recently posted painting in the \"Arts + crafts\" category between $80-100 that shows the sea.", + "image": null, + "instantiation_dict": { + "item": "most recently posted painting in the \"Arts + crafts\" category between $80-100 that shows the sea" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=47487", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 8 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 20, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "Show me the {{item}}.", + "intent": "Show me the most recently posted white Xbox.", + "image": null, + "instantiation_dict": { + "item": "most recently posted white Xbox" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=49061", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 8 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 21, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "Show me the {{item}}.", + "intent": "Show me the newest \"RVs + campers\" listing that is in a dark color.", + "image": null, + "instantiation_dict": { + "item": "newest \"RVs + campers\" listing that is in a dark color" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=33164", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 8 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 22, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=10&sShowAs=gallery", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "How {{quantity}} does the {{item}} have?", + "intent": "How many miles does the red car in the second row have?", + "image": null, + "instantiation_dict": { + "quantity": "many miles", + "item": "red car in the second row" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "103K |OR| 103,000 |OR| 103000" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 9 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 23, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=10&sShowAs=gallery", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "How {{quantity}} does the {{item}} have?", + "intent": "How many miles does the second car in the third row have?", + "image": null, + "instantiation_dict": { + "quantity": "many miles", + "item": "second car in the third row" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "64K |OR| 64,000 |OR| 64000" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 9 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 24, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=10&sShowAs=gallery&iPage=5", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "How {{quantity}} does the {{item}} have?", + "intent": "How many miles does the black truck on this page have?", + "image": null, + "instantiation_dict": { + "quantity": "many miles", + "item": "black truck on this page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "Miles not listed for this car." + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=83979 does not list miles", + "intent_template_id": 9 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 25, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "How many {{item}} {{criteria}}?", + "intent": "How many red boats were posted on 16th November 2023?", + "image": null, + "instantiation_dict": { + "item": "red boats", + "criteria": "were posted on 16th November 2023" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1 |OR| one" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=49894", + "intent_template_id": 10 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 26, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "How many {{item}} {{criteria}}?", + "intent": "How many yellow or blue motorcycles in total were posted on 25th October 2023?", + "image": null, + "instantiation_dict": { + "item": "yellow or blue motorcycles in total", + "criteria": "were posted on 25th October 2023" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2 |OR| two" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=74997 and __CLASSIFIEDS__/index.php?page=item&id=58261", + "intent_template_id": 10 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 27, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "How many {{item}} {{criteria}}?", + "intent": "How many of the listings in \"Rvs + campers\" posted on 16th November 2023 show the interior of the car?", + "image": null, + "instantiation_dict": { + "item": "of the listings in \"Rvs + campers\" posted on 16th November 2023", + "criteria": "show the interior of the car" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "3 |OR| three" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=32934 and __CLASSIFIEDS__/index.php?page=item&id=74707 and __CLASSIFIEDS__/index.php?page=item&id=32672", + "intent_template_id": 10 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 28, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Add a comment on the {{item}} with the title \"{{title}}\" and text \"{{comment}}\".", + "intent": "Add a comment on the most recently posted painting of a bridge with the title \"Question\" and text \"When did you acquire it?\".", + "image": null, + "instantiation_dict": { + "item": "most recently posted painting of a bridge", + "title": "Question", + "comment": "When did you acquire it?" + }, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=12085", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Question by Blake Sullivan" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=12085", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "When did you acquire it?" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=12085", + "intent_template_id": 11 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 29, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Add a comment on the {{item}} with the title \"{{title}}\" and text \"{{comment}}\".", + "intent": "Add a comment on the most recently posted red car with the title \"Nice car\" and text \"Is it still available?\".", + "image": null, + "instantiation_dict": { + "item": "most recently posted red car", + "title": "Nice car", + "comment": "Is it still available?" + }, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=66091", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Nice car by Blake Sullivan" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=66091", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "Is it still available?" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=66091", + "intent_template_id": 11 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 30, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Add a comment on the {{item}} with the title \"{{title}}\" and text \"{{comment}}\".", + "intent": "Add a comment on the most expensive black couch with the title \"Interesting Couch\" and text \"Is the price negotiable?\".", + "image": null, + "instantiation_dict": { + "item": "most expensive black couch", + "title": "Interesting Couch", + "comment": "Is the price negotiable?" + }, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=44542", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Interesting Couch by Blake Sullivan" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=44542", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "Is the price negotiable?" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=44542", + "intent_template_id": 11 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 31, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find {{item}} and post a comment offering $10 less than their asking price.", + "intent": "Find the latest listing of a white Google Pixel phone and post a comment offering $10 less than their asking price.", + "image": null, + "instantiation_dict": { + "item": "the latest listing of a white Google Pixel phone" + }, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=47273", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "$250" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=47273", + "intent_template_id": 12 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 32, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find {{item}} and post a comment offering $10 less than their asking price.", + "intent": "Find this exact bike that's listed for $300-500 and post a comment offering $10 less than their asking price.", + "image": "__CLASSIFIEDS__/oc-content/uploads/9689/9689.png", + "instantiation_dict": { + "item": "this exact bike that's listed for $300-500" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=9689", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "$385" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=9689", + "intent_template_id": 12 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 33, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find {{item}} and post a comment offering $10 less than their asking price.", + "intent": "Find the latest listing of a white fridge and post a comment offering $10 less than their asking price.", + "image": null, + "instantiation_dict": { + "item": "the latest listing of a white fridge" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=62633", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "$115" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=62633", + "intent_template_id": 12 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 34, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Search for \"{{phrase}}\"{{detail}} and return the links of {{target}}.", + "intent": "Search for \"painting\" and return the links of the most recent three items that feature animals.", + "image": null, + "instantiation_dict": { + "phrase": "painting", + "detail": "", + "target": "the most recent three items that feature animals" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "__CLASSIFIEDS__/index.php?page=item&id=14761", + "__CLASSIFIEDS__/index.php?page=item&id=32385", + "__CLASSIFIEDS__/index.php?page=item&id=47173" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 13 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 35, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Search for \"{{phrase}}\"{{detail}} and return the links of {{target}}.", + "intent": "Search for \"loveseat\" in the Furniture category and return the links of the most recent two items that are not black or gray.", + "image": null, + "instantiation_dict": { + "phrase": "loveseat", + "detail": " in the Furniture category", + "category": "Furniture", + "target": "the most recent two items that are not black or gray" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "__CLASSIFIEDS__/index.php?page=item&id=33441", + "__CLASSIFIEDS__/index.php?page=item&id=42263" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 13 + }, + { + "sites": [ + "classifieds", + "shopping" + ], + "task_id": 36, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__ |AND| __SHOPPING__", + "geolocation": null, + "intent_template": "Find all listings for this exact item on OSClass and compare its price to the cheapest listing on OneStopMarket. Return the link of {{target}}.", + "intent": "Find all listings for this exact item on OSClass and compare its price to the cheapest listing on OneStopMarket. Return the link of the cheaper item.", + "image": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B07SK4W1VJ.0.jpg", + "instantiation_dict": { + "target": "the cheaper item" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "__SHOPPING__/luigi-s-mansion-3-nintendo-switch.html" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 14 + }, + { + "sites": [ + "classifieds", + "shopping" + ], + "task_id": 37, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__ |AND| __SHOPPING__", + "geolocation": null, + "intent_template": "Find all listings for this exact item on OSClass and compare its price to the cheapest listing on OneStopMarket. Return the link of {{target}}.", + "intent": "Find all listings for this exact item on OSClass and compare its price to the cheapest listing on OneStopMarket. Return the link of the cheapest item on both sites.", + "image": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B07YZQ9QMD.0.jpg", + "instantiation_dict": { + "target": "the cheapest item on both sites" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "__SHOPPING__/nintendo-switch-w-neon-blue-neon-red-joy-con-mario-kart-8-deluxe-full-game-download-switch.html", + "__CLASSIFIEDS__/index.php?page=item&id=59435" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 14 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 38, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the three most recent {{item}} in the \"{{category}}\" category, and tell me {{attribute}}.", + "intent": "Find the three most recent grey or silver sedans in the \"Cars + trucks\" category, and tell me how much mileage each has.", + "image": null, + "instantiation_dict": { + "item": "grey or silver sedans", + "category": "Cars + trucks", + "attribute": "how much mileage each has" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "175k |OR| 175000 |OR| 175,000", + "153k |OR| 153000 |OR| 153,000", + "154k |OR| 154000 |OR| 154,000" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=60945 and __CLASSIFIEDS__/index.php?page=item&id=52305 and __CLASSIFIEDS__/index.php?page=item&id=50777", + "intent_template_id": 15 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 39, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the three most recent {{item}} in the \"{{category}}\" category, and tell me {{attribute}}.", + "intent": "Find the three most recent bikes with predominantly red frames in the \"Bikes\" category, and tell me the size of their frames in inches.", + "image": null, + "instantiation_dict": { + "item": "bikes with predominantly red frames", + "category": "Bikes", + "attribute": "the size of their frames in inches" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "18", + "14", + "19" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=70291 and __CLASSIFIEDS__/index.php?page=item&id=76502 and __CLASSIFIEDS__/index.php?page=item&id=66548", + "intent_template_id": 15 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 40, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Search for \"{{phrase}}\" and tell me {{detail}}.", + "intent": "Search for \"dishwasher\" and tell me the brand of the most recent listing of a stainless steel one.", + "image": null, + "instantiation_dict": { + "phrase": "dishwasher", + "detail": "the brand of the most recent listing of a stainless steel one" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "LG" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=18607", + "intent_template_id": 16 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 41, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=8&sShowAs=gallery", + "geolocation": null, + "intent_template": "What is the price range of {{items}}?", + "intent": "What is the price range of the listings in the second row of this page?", + "image": null, + "instantiation_dict": { + "items": "the listings in the second row of this page" + }, + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1200", + "23750" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 17 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 42, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=12&sShowAs=gallery", + "geolocation": null, + "intent_template": "What is the price range of {{items}}?", + "intent": "What is the price range of the listings in the last two rows of this page?", + "image": null, + "instantiation_dict": { + "items": "the listings in the last two rows of this page" + }, + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "5", + "120" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 17 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 43, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=10&sShowAs=gallery&iPage=4", + "geolocation": null, + "intent_template": "What is the price range of {{items}}?", + "intent": "What is the price range of all the red vehicles on this page?", + "image": null, + "instantiation_dict": { + "items": "all the red vehicles on this page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "7800", + "9999" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 17 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 44, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "I recall seeing this exact item on the site, help me find the most recent post of it.", + "intent": "I recall seeing this exact item on the site, help me find the most recent post of it.", + "image": "__CLASSIFIEDS__/oc-content/uploads/26492/26492.png", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=26492", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 18 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 45, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "I recall seeing this exact item on the site, help me find the most recent post of it.", + "intent": "I recall seeing this exact item on the site, help me find the most recent post of it.", + "image": "__CLASSIFIEDS__/oc-content/uploads/45196/45196.png", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=45196", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 18 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 46, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "I recall seeing this exact item on the site, help me find the most recent post of it.", + "intent": "I recall seeing this exact item on the site, help me find the most recent post of it.", + "image": "__CLASSIFIEDS__/oc-content/uploads/66067/66067.png", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=66067", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 18 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 47, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "I recall seeing this exact item on the site, help me find the most recent post of it. I recall {{hint}}.", + "intent": "I recall seeing this exact item on the site, help me find the most recent post of it. I recall that it was very expensive.", + "image": "__CLASSIFIEDS__/oc-content/uploads/7434/7434.png", + "instantiation_dict": { + "hint": "that it was very expensive" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=7434", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 19 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 48, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "I recall seeing this exact item on the site, help me find the most recent post of it. I recall {{hint}}.", + "intent": "I recall seeing this exact item on the site, help me find the most recent post of it. I recall seeing it in either the Collectibles or Antiques section.", + "image": "__CLASSIFIEDS__/oc-content/uploads/20629/20629.png", + "instantiation_dict": { + "hint": "seeing it in either the Collectibles or Antiques section" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=20629", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 19 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 49, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=14", + "geolocation": null, + "intent_template": "How {{quantity}} does the {{item}} have?", + "intent": "How much RAM (in GB) does the item with blue LED lights on this page have?", + "image": null, + "instantiation_dict": { + "quantity": "much RAM (in GB)", + "item": "item with blue LED lights on this page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "64 |OR| 64GB" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=60306", + "intent_template_id": 9 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 50, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=6", + "geolocation": null, + "intent_template": "What is the email of the seller of {{item}}?", + "intent": "What is the email of the seller of the red palette on this page?", + "image": null, + "instantiation_dict": { + "item": "the red palette on this page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "sofia_kumar@example.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=65673", + "intent_template_id": 6 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 51, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find me the most recent {{type}} in the \"{{category}}\" category of {{item}}.", + "intent": "Find me the most recent painting in the \"Arts + crafts\" category of a town that looks like this.", + "image": "https://images.pexels.com/photos/161815/santorini-oia-greece-water-161815.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "type": "painting", + "category": "Arts + crafts", + "item": "a town that looks like this" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=39336", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "6th page of the Arts + crafts section", + "intent_template_id": 20 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 52, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find me the most recent {{type}} in the \"{{category}}\" category of {{item}}.", + "intent": "Find me the most recent painting in the \"Arts + crafts\" category of something that looks close to this.", + "image": "https://images.pexels.com/photos/1330219/pexels-photo-1330219.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "type": "painting", + "category": "Arts + crafts", + "item": "something that looks close to this" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=60133", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "2nd page of the Arts + crafts section", + "intent_template_id": 20 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 53, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Search for \"{{phrase}}\" and find the {{product}}.", + "intent": "Search for \"gnome\" and find the item that would make the best gift for someone who goes to this school.", + "image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Pitt_Panthers_wordmark.svg.png.webp", + "instantiation_dict": { + "phrase": "gnome", + "product": "item that would make the best gift for someone who goes to this school" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=19511", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 21 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 54, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the most recently listed item in the \"{{category}}\" category that {{condition}}.", + "intent": "Find the most recently listed item in the \"Video gaming\" category that is related to this game I used to play.", + "image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Donkey_Kong_NES_Screenshot.png.webp", + "instantiation_dict": { + "category": "Video gaming", + "condition": "is related to this game I used to play" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=787", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 22 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 55, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the most recently listed item in the \"{{category}}\" category that {{condition}}.", + "intent": "Find the most recently listed item in the \"Collectibles\" category that depicts the inventors of this.", + "image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/First_flight2.jpg.webp", + "instantiation_dict": { + "category": "Collectibles", + "condition": "depicts the inventors of this" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=78765", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "Inventors are the Wright brothers", + "intent_template_id": 22 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 56, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{item}} that {{condition}}.", + "intent": "Find the cheapest snowblower that is not red in color.", + "image": null, + "instantiation_dict": { + "item": "cheapest snowblower", + "condition": "is not red in color" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=25929", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 23 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 57, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Add a comment on the {{item}} with the title \"{{title}}\" and text \"{{comment}}\".", + "intent": "Add a comment on the two most recently posted blue bikes between $100-150 with the title \"Interested in your bike\" and text \"Is it still available?\".", + "image": null, + "instantiation_dict": { + "item": "two most recently posted blue bikes between $100-150", + "title": "Interested in your bike", + "comment": "Is it still available?" + }, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=66613", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Interested in your bike" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=66613", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "Is it still available?" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=71313", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Interested in your bike" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=71313", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "Is it still available?" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=66613 and __CLASSIFIEDS__/index.php?page=item&id=71313", + "intent_template_id": 11 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 58, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Explore the \"{{category}}\" category of {{location}} and find me {{item}}.", + "intent": "Explore the \"Furniture\" category of Washington, D.C. and find me the most recent blue chair.", + "image": null, + "instantiation_dict": { + "category": "Furniture", + "location": "Washington, D.C.", + "item": "the most recent blue chair" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=37760", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 24 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 59, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{product}} with {{detail}} from the image {{location}}.", + "intent": "Find the cheapest video game item with the animal from the image on its display.", + "image": "https://images.pexels.com/photos/86405/penguin-funny-blue-water-86405.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "product": "cheapest video game item", + "detail": "the animal", + "location": "on its display" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=6247", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 25 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 60, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{product}} where {{detail}}.", + "intent": "Find the most expensive video game item where I can roleplay the situation in the image.", + "image": "https://images.pexels.com/photos/620335/pexels-photo-620335.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "product": "most expensive video game item", + "detail": "I can roleplay the situation in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=4188", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 26 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 61, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{product}} where {{detail}}.", + "intent": "Find the most expensive video game item where I can roleplay the situation in the image.", + "image": "https://images.pexels.com/photos/344034/pexels-photo-344034.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "product": "most expensive video game item", + "detail": "I can roleplay the situation in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=42756", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 26 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 62, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{product}} with {{detail}} from the image {{location}}.", + "intent": "Find the most expensive video game item with the character from the image on its display.", + "image": "https://images.pexels.com/photos/2854693/pexels-photo-2854693.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "product": "most expensive video game item", + "detail": "the character", + "location": "on its display" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=19137", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 25 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 63, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{product}} with {{detail}} from the image {{location}}.", + "intent": "Find the most expensive video game item with the character from the image on its display.", + "image": "https://images.pexels.com/photos/7768661/pexels-photo-7768661.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "product": "most expensive video game item", + "detail": "the character", + "location": "on its display" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=17379", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 25 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 64, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{product}} where {{detail}}.", + "intent": "Find the video game item that costs exactly $500 where I can roleplay the situation in the image.", + "image": "https://images.pexels.com/photos/620335/pexels-photo-620335.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "product": "video game item that costs exactly $500", + "detail": "I can roleplay the situation in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=73629", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 26 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 65, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{product}} where {{detail}}.", + "intent": "Find the cheapest video game item where I can roleplay the situation in the image.", + "image": "https://images.pexels.com/photos/274422/pexels-photo-274422.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "product": "cheapest video game item", + "detail": "I can roleplay the situation in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=28239", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 26 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 66, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{product}} where {{detail}}.", + "intent": "Find the cheapest video game item where I can roleplay the situation in the image.", + "image": "https://images.pexels.com/photos/718952/pexels-photo-718952.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "product": "cheapest video game item", + "detail": "I can roleplay the situation in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=6175", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 26 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 67, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{product}} where {{detail}}.", + "intent": "Find the cheapest video game item where I can roleplay the situation in the image.", + "image": "https://images.pexels.com/photos/2346/sport-high-united-states-of-america-ball.jpg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "product": "cheapest video game item", + "detail": "I can roleplay the situation in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=7114", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 26 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 68, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{product}} where {{detail}}.", + "intent": "Find the cheapest Tiger Woods video game item where the person on the cover has a red shirt.", + "image": null, + "instantiation_dict": { + "product": "cheapest Tiger Woods video game item", + "detail": "the person on the cover has a red shirt" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=2278", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 26 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 69, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{product}} which {{detail}}.", + "intent": "Find the most expensive item posted from Delaware which is related to the brand on the hat in the image.", + "image": "https://images.pexels.com/photos/16976200/pexels-photo-16976200/free-photo-of-girl-sitting-on-a-curb.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "product": "most expensive item posted from Delaware", + "detail": "is related to the brand on the hat in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=15047", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 27 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 70, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{item}} from {{location}} {{condition}}.", + "intent": "Find the most expensive tractor from Delaware that is green.", + "image": null, + "instantiation_dict": { + "item": "most expensive tractor", + "location": "Delaware", + "condition": "that is green" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=68860", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 28 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 71, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{product}} which {{detail}}.", + "intent": "Find the most expensive item posted from Delaware which is of the same item as in the image.", + "image": "https://images.pexels.com/photos/359995/pexels-photo-359995.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "product": "most expensive item posted from Delaware", + "detail": "is of the same item as in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=12399 |OR| __CLASSIFIEDS__/index.php?page=item&id=53168", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 27 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 72, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "I like {{attribute}}. Can you find the {{detail}} where I can buy similar items, if there's one?", + "intent": "I like the products in the image. Can you find the cheapest posting from Delaware where I can buy similar items, if there's one?", + "image": "https://images.pexels.com/photos/5883534/pexels-photo-5883534.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "attribute": "the products in the image", + "detail": "cheapest posting from Delaware" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=43255", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 29 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 73, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find me {{product}} that {{condition}} in the image.", + "intent": "Find me the cheapest posting from Delaware that has the same brand as the product in the image.", + "image": "https://images.pexels.com/photos/1858115/pexels-photo-1858115.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "condition": "has the same brand as the product", + "product": "the cheapest posting from Delaware" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=78325", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 30 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 74, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{item}} from {{location}} {{condition}}.", + "intent": "Find the most expensive truck from Ohio that is white.", + "image": null, + "instantiation_dict": { + "item": "most expensive truck", + "location": "Ohio", + "condition": "that is white" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=14818", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 28 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 75, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to my listing {{item}} and {{action}}.", + "intent": "Navigate to my listing of the white vase and change the price to $120 (including in the description).", + "image": null, + "instantiation_dict": { + "item": "of the white vase", + "action": "change the price to $120 (including in the description)" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=84148", + "locator": "func:get_query_text(__page__, '.price')", + "required_contents": { + "must_include": [ + "120.00" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=84148", + "locator": "func:get_query_text(__page__, '.desc')", + "required_contents": { + "must_include": [ + "$120" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=84148", + "locator": "func:get_query_text(__page__, '.desc')", + "required_contents": { + "must_exclude": [ + "$80" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 2 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 76, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to my listing {{item}} and {{action}}.", + "intent": "Navigate to my listing of the blue bike and change the price to $85.50 (including in the description).", + "image": null, + "instantiation_dict": { + "item": "of the blue bike", + "action": "change the price to $85.50 (including in the description)" + }, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=84146", + "locator": "func:get_query_text(__page__, '.price')", + "required_contents": { + "must_include": [ + "85.50" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=84146", + "locator": "func:get_query_text(__page__, '.desc')", + "required_contents": { + "must_include": [ + "$85.50" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=84146", + "locator": "func:get_query_text(__page__, '.desc')", + "required_contents": { + "must_exclude": [ + "$250" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 2 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 77, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to my listing {{item}} and {{action}}.", + "intent": "Navigate to my listing of the all-black bike and change the price (including in the description) to one dollar more than the cheapest bike on the site (ignoring bikes that cost $1).", + "image": null, + "instantiation_dict": { + "item": "of the all-black bike", + "action": "change the price (including in the description) to one dollar more than the cheapest bike on the site (ignoring bikes that cost $1)" + }, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=84145", + "locator": "func:get_query_text(__page__, '.price')", + "required_contents": { + "must_include": [ + "6.00" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=84145", + "locator": "func:get_query_text(__page__, '.desc')", + "required_contents": { + "must_include": [ + "$6" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=84145", + "locator": "func:get_query_text(__page__, '.desc')", + "required_contents": { + "must_exclude": [ + "$300" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 2 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 78, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=9&sOrder=dt_pub_date&iOrderType=desc&iPage=4", + "geolocation": null, + "intent_template": "Find me {{product}} that {{condition}} in the image.", + "intent": "Find me an item on this page that references the city in the image.", + "image": "https://images.pexels.com/photos/12493794/pexels-photo-12493794.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "an item on this page", + "condition": "references the city" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=14342", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 30, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "classifieds" + ], + "task_id": 79, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=9&sOrder=dt_pub_date&iOrderType=desc&iPage=4", + "geolocation": null, + "intent_template": "Find me {{product}} that {{condition}} in the image.", + "intent": "Find me an item on this page that references the person in the image.", + "image": "https://images.pexels.com/photos/1604849/pexels-photo-1604849.jpeg", + "instantiation_dict": { + "product": "an item on this page", + "condition": "references the person" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=44608", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 30, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "classifieds" + ], + "task_id": 80, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=9&iPage=4&sOrder=i_price&iOrderType=asc", + "geolocation": null, + "intent_template": "Tell me the {{detail}} that {{product}}?", + "intent": "Tell me the first names of the listers who posted the items on the page that do not include a book in its image?", + "image": null, + "instantiation_dict": { + "detail": "first names of the listers who posted the items on the page", + "product": "do not include a book in its image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Yuna", + "Siti" + ] + }, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=33101 and __CLASSIFIEDS__/index.php?page=item&id=45618", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 31 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 81, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the cheapest {{item}} {{condition}}.", + "intent": "Find the cheapest book with a hurricane on the cover.", + "image": null, + "instantiation_dict": { + "item": "book", + "condition": "with a hurricane on the cover" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=4727", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 32 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 82, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{product}}, and tell me how much it costs in dollars.", + "intent": "Find the most expensive purple hard-case book, and tell me how much it costs in dollars.", + "image": null, + "instantiation_dict": { + "product": "most expensive purple hard-case book" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "string_match" + ], + "reference_answers": { + "must_include": [ + "1100" + ] + }, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=44383", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 33 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 83, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=19&sOrder=i_price&iOrderType=desc&iPage=2", + "geolocation": null, + "intent_template": "Tell me the {{detail}} that {{product}}?", + "intent": "Tell me the last name of the lister who posted the item on the page that does not include jewelry in its image?", + "image": null, + "instantiation_dict": { + "detail": "last name of the lister who posted the item on the page", + "product": "does not include jewelry in its image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Nowak" + ] + }, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=6407", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 31 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 84, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{product}}, and tell me how much it costs in dollars.", + "intent": "Find the most expensive ring that has an image taken as a selfie, and tell me how much it costs in dollars.", + "image": null, + "instantiation_dict": { + "product": "most expensive ring that has an image taken as a selfie" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "string_match" + ], + "reference_answers": { + "must_include": [ + "6000" + ] + }, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=43966", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 33 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 85, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{product}}, and tell me how much it costs in dollars.", + "intent": "Find the most expensive ring that does not show the physical ring in the image, and tell me how much it costs in dollars.", + "image": null, + "instantiation_dict": { + "product": "most expensive ring that does not show the physical ring in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "string_match" + ], + "reference_answers": { + "must_include": [ + "3200" + ] + }, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=57020", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 33 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 86, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the cheapest {{item}} {{condition}}.", + "intent": "Find the cheapest Toyota that is the same color as the cheaper Toyota in my listings.", + "image": null, + "instantiation_dict": { + "item": "Toyota", + "condition": "that is the same color as the cheaper Toyota in my listings" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=72103", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 32 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 87, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=15&sOrder=i_price&iOrderType=desc&iPage=5", + "geolocation": null, + "intent_template": "Find the {{item}} on this page {{condition}}.", + "intent": "Find the most similar item on this page as shown in this image.", + "image": "https://images.pexels.com/photos/3051966/pexels-photo-3051966.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "item": "most similar item", + "condition": "as shown in this image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=34463", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 34 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 88, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{item}} from {{location}} {{condition}}.", + "intent": "Find the most expensive red vehicle in the \"Cars + trucks\" category from Virginia with trees in the background of its image.", + "image": null, + "instantiation_dict": { + "item": "most expensive red vehicle in the \"Cars + trucks\" category", + "location": "Virginia", + "condition": "with trees in the background of its image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=50736", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 28 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 89, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{item}} from {{location}} {{condition}}.", + "intent": "Find the most expensive car from Virginia that is neon green.", + "image": null, + "instantiation_dict": { + "item": "most expensive car", + "location": "Virginia", + "condition": "that is neon green" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=63326", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 28 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 90, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sRegion=9254928&sOrder=i_price&iOrderType=asc&sCategory=10&iPage=2", + "geolocation": null, + "intent_template": "Find the {{item}} on this page {{condition}}.", + "intent": "Find the car on this page that has the most discoloring.", + "image": null, + "instantiation_dict": { + "item": "car", + "condition": "that has the most discoloring" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=52649", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 34 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 91, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the cheapest {{item}} {{condition}}.", + "intent": "Find the cheapest car listing in the \"Cars + trucks\" category from Ohio that has an image taken from inside the car.", + "image": null, + "instantiation_dict": { + "item": "car listing in the \"Cars + trucks\" category from Ohio", + "condition": "that has an image taken from inside the car" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=81402", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 32 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 92, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{item}} from {{location}} {{condition}}.", + "intent": "Find the most expensive TV from Maryland that displays an ongoing NFL game.", + "image": null, + "instantiation_dict": { + "item": "most expensive TV", + "location": "Maryland", + "condition": "that displays an ongoing NFL game" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=23785", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 28 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 93, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sRegion=7361885&sCategory=15&sOrder=i_price&iOrderType=asc&iPage=4", + "geolocation": null, + "intent_template": "Find the {{product}} which {{detail}}.", + "intent": "Find the electronics item on the page which is compatible with this image.", + "image": "https://images.pexels.com/photos/1334597/pexels-photo-1334597.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "electronics item on the page", + "detail": "is compatible with this image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=42923", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 27 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 94, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=16", + "geolocation": null, + "intent_template": "Find the {{item}} on this page {{condition}}.", + "intent": "Find the animal on this page that has someone riding it in the image.", + "image": null, + "instantiation_dict": { + "item": "animal", + "condition": "that has someone riding it in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=79622", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 34 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 95, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{item}} with {{attribute}}. Add a {{rating}} rating with title \"{{title}}\" and text \"{{text}}\".", + "intent": "Find the most expensive boat with a listing image showing it on water. Add a 5 star rating with title \"Awesome boat\" and text \"Mine is similar!\".", + "image": null, + "instantiation_dict": { + "item": "most expensive boat", + "attribute": "a listing image showing it on water", + "rating": "5 star", + "title": "Awesome boat", + "text": "Mine is similar!" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=40932", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Awesome boat by Blake Sullivan" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=40932", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "Mine is similar!", + "5 of 5" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 35 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 96, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=3&sOrder=i_price&iOrderType=asc&iPage=12", + "geolocation": null, + "intent_template": "Find the {{product}}, and tell me how much it costs in dollars.", + "intent": "Find the most similar item on this page as given in the image, and tell me how much it costs in dollars.", + "image": "https://images.pexels.com/photos/4107286/pexels-photo-4107286.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "most similar item on this page as given in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "string_match" + ], + "reference_answers": { + "must_include": [ + "14" + ] + }, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=5939", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 33 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 97, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Explore the \"{{category}}\" category, and find the {{item}}.", + "intent": "Explore the \"Households\" category, and find the most recent item designed in the shape of an animal.", + "image": null, + "instantiation_dict": { + "category": "Households", + "item": "most recent item designed in the shape of an animal" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=57906", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 36 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 98, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "How many {{attribute}} are {{listing detail}}?", + "intent": "How many hours are on the engine of the most recently listed red boat?", + "image": null, + "instantiation_dict": { + "attribute": "hours", + "listing detail": "on the engine of the most recently listed red boat" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "80 |OR| eighty" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 37 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 99, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{product}}, and tell me how much it costs in dollars.", + "intent": "Find the most expensive wheelchair lift that has multiple people in the image of the listing, and tell me how much it costs in dollars.", + "image": null, + "instantiation_dict": { + "product": "most expensive wheelchair lift that has multiple people in the image of the listing" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "string_match" + ], + "reference_answers": { + "must_include": [ + "2995" + ] + }, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=9830", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 33 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 100, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "What is {{attribute}} on the {{product}}?", + "intent": "What is the number shown on the image of the most expensive race car in the Arts + crafts section?", + "image": null, + "instantiation_dict": { + "attribute": "the number shown", + "product": "image of the most expensive race car in the Arts + crafts section" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "61 |OR| sixty one" + ] + }, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=2354", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 38 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 101, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Explore the \"{{category}}\" category, and find the {{item}} in this image.", + "intent": "Explore the \"Art + crafts\" category, and find the most expensive painting of the city in this image.", + "image": "https://images.pexels.com/photos/12493795/pexels-photo-12493795.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "category": "Art + crafts", + "item": "most expensive painting of the city" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=83533", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 39 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 102, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Tell me the {{product}}.", + "intent": "Tell me the height in inches of the most recently listed item in Art + crafts section portraying the person in the image.", + "image": "https://images.pexels.com/photos/767276/pexels-photo-767276.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "height in inches of the most recently listed item in Art + crafts section portraying the person in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "8 1/2 |OR| eight and a half |OR| 8.5 |OR| eight point five" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 40 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 103, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Explore the \"{{category}}\" category, and find the {{item}} in this image.", + "intent": "Explore the \"Arts + crafts\" category, and find the most recently listed item portraying the person in this image.", + "image": "https://images.pexels.com/photos/37072/abraham-lincoln-lincoln-memorial-washington-dc-lincoln.jpg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "category": "Arts + crafts", + "item": "most recently listed item portraying the person" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=77577", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 39 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 104, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{item}} with {{attribute}}. Add a {{rating}} rating with title \"{{title}}\" and text \"{{text}}\".", + "intent": "Find the most recently listed RV with an iPhone screenshot as its image. Add a 3 star rating with title \"Any other pics?\" and text \"Do you have a higher quality picture?\".", + "image": null, + "instantiation_dict": { + "item": "most recently listed RV", + "attribute": "an iPhone screenshot as its image", + "rating": "3 star", + "title": "Any other pics?", + "text": "Do you have a higher quality picture?" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=21206", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Any other pics? by Blake Sullivan" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=21206", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "Do you have a higher quality picture?", + "3 of 5" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 35 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 105, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the cheapest {{item}} {{condition}}.", + "intent": "Find the cheapest guitar with a red velvet lined case.", + "image": null, + "instantiation_dict": { + "item": "guitar", + "condition": "with a red velvet lined case" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=23225", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 32 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 106, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "What is the email of the seller of {{item}}?", + "intent": "What is the email of the seller of the most expensive item in Photo + videos that has an animal for its image?", + "image": null, + "instantiation_dict": { + "item": "the most expensive item in Photo + videos that has an animal for its image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "miguel_ito@example.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 6 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 107, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Tell me the {{product}}.", + "intent": "Tell me the color of the rims on the most expensive bike in West Virginia.", + "image": null, + "instantiation_dict": { + "product": "color of the rims on the most expensive bike in West Virginia" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "purple", + "blue |OR| cyan" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=52440", + "intent_template_id": 40 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 108, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Tell me the {{product}}.", + "intent": "Tell me the name of the lister with the most expensive green vehicle from West Virginia.", + "image": null, + "instantiation_dict": { + "product": "name of the lister with the most expensive green vehicle from West Virginia" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Daiki Lee" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=7438", + "intent_template_id": 40 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 109, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "How {{quantity}} does the {{item}} have?", + "intent": "How many miles does the most expensive white sports car in the \"Cars + trucks\" category from Ohio have?", + "image": null, + "instantiation_dict": { + "quantity": "many miles", + "item": "most expensive white sports car in the \"Cars + trucks\" category from Ohio" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "36K |OR| 36,000 |OR| 36000" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=48555", + "intent_template_id": 9 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 110, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Search for \"{{phrase}}\" and tell me {{detail}}.", + "intent": "Search for \"mario kart\" and tell me how many games are in the image of the most recently listed item that costs at least $300.", + "image": null, + "instantiation_dict": { + "phrase": "mario kart", + "detail": "how many games are in the image of the most recently listed item that costs at least $300" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0 |OR| zero" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=34406", + "intent_template_id": 16 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 111, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Search for \"{{phrase}}\" and tell me {{detail}}.", + "intent": "Search for \"hockey\" and tell me the team name of the jersey on the most recently listed item.", + "image": null, + "instantiation_dict": { + "phrase": "hockey", + "detail": "the team name of the jersey on the most recently listed item" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Kings" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=79079", + "intent_template_id": 16 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 112, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Search for \"{{phrase}}\" and navigate to the {{product}} that {{condition}} in its image.", + "intent": "Search for \"basketball\" and navigate to the cheapest item that has a man in a suit in its image.", + "image": null, + "instantiation_dict": { + "phrase": "basketball", + "product": "cheapest item", + "condition": "has a man in a suit" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=68564", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 41 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 113, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Search for \"{{phrase}}\" and navigate to the {{product}}.", + "intent": "Search for \"football\" and navigate to the most expensive item whose image has a player wearing number 16.", + "image": null, + "instantiation_dict": { + "phrase": "football", + "product": "most expensive item whose image has a player wearing number 16" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=30630", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 42 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 114, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to the {{item}} from {{location}} {{condition}}.", + "intent": "Navigate to the oldest listing from Virginia which contains multiple people in its image.", + "image": null, + "instantiation_dict": { + "item": "oldest listing", + "location": "Virginia", + "condition": "which contains multiple people in its image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=29164", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 43 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 115, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Search for \"{{phrase}}\" and navigate to the {{product}} that matches the body part in this image.", + "intent": "Search for \"brace\" and navigate to the most recently listed item that matches the body part in this image.", + "image": "https://images.pexels.com/photos/356175/pexels-photo-356175.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "phrase": "brace", + "product": "most recently listed item" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=68133", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 44 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 116, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Search for \"{{phrase}}\" and navigate to the {{product}} that matches the body part in this image.", + "intent": "Search for \"brace\" and navigate to the cheapest item that matches the body part in this image.", + "image": "https://images.pexels.com/photos/2077959/pexels-photo-2077959.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "phrase": "brace", + "product": "cheapest item" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=42654", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 44 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 117, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the cheapest {{item}} {{condition}}.", + "intent": "Find the cheapest bike which is of a similar color as the person's outfit.", + "image": "https://images.pexels.com/photos/14656911/pexels-photo-14656911.jpeg?auto=compress&cs=tinysrgb&w=800", + "instantiation_dict": { + "item": "bike", + "condition": "which is of a similar color as the person's outfit" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=18534", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 32 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 118, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=11&sOrder=dt_pub_date&iOrderType=desc&sShowAs=gallery", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "What is {{attribute}} on the {{product}}?", + "intent": "What is the time on the phone with the visible screen in the third row on this page?", + "image": null, + "instantiation_dict": { + "attribute": "the time", + "product": "phone with the visible screen in the third row on this page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "3:03 |OR| 303" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=69495", + "intent_template_id": 38 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 119, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=11&sOrder=i_price&iOrderType=desc&sShowAs=gallery", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "What is the {{attribute}} of {{item}}?", + "intent": "What is the denomination of the dollar bill shown in that image on this page?", + "image": null, + "instantiation_dict": { + "attribute": "denomination", + "item": "the dollar bill shown in that image on this page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "50 |OR| fifty" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "_CLASSIFIEDS__/index.php?page=item&id=75385", + "intent_template_id": 5 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 120, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=7&sShowAs=gallery&sOrder=i_price&iOrderType=desc", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "What is {{attribute}} on the {{product}}?", + "intent": "What is written on the down tube of the bike with the purple frame on this page?", + "image": null, + "instantiation_dict": { + "attribute": "written", + "product": "down tube of the bike with the purple frame on this page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "canyon" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=38276", + "intent_template_id": 38 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 121, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=11&sOrder=i_price&iOrderType=asc&sShowAs=gallery", + "geolocation": null, + "intent_template": "Navigate to the {{item}} on this page {{condition}}.", + "intent": "Navigate to the item on this page whose image consists solely of text.", + "image": null, + "instantiation_dict": { + "item": "item", + "condition": "whose image consists solely of text" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=15619", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 45 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 122, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{product}}, and tell me how much it costs in dollars.", + "intent": "Find the oldest listed red book in the \"Books\" category, and tell me how much it costs in dollars.", + "image": null, + "instantiation_dict": { + "product": "oldest listed red book in the \"Books\" category" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "string_match" + ], + "reference_answers": { + "must_include": [ + "3" + ] + }, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=69456", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 33 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 123, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sPattern=clothes&sOrder=i_price&iOrderType=desc&sShowAs=gallery", + "geolocation": null, + "intent_template": "Navigate to the {{item}} on this page {{condition}}.", + "intent": "Navigate to the item on this page which matches the color of the object being used by the person in the given image.", + "image": "https://images.pexels.com/photos/13991925/pexels-photo-13991925.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "item": "item", + "condition": "which matches the color of the object being used by the person in the given image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=48074", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 45 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 124, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=17&sShowAs=gallery", + "geolocation": null, + "intent_template": "Navigate to the item on this page whose image {{condition}}.", + "intent": "Navigate to the item on this page whose image is set on grass.", + "image": null, + "instantiation_dict": { + "condition": "is set on grass" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=10702", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 46 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 125, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=17&sShowAs=gallery", + "geolocation": null, + "intent_template": "Navigate to the item on this page whose image {{condition}}.", + "intent": "Navigate to the item on this page whose image is a desktop screenshot.", + "image": null, + "instantiation_dict": { + "condition": "is a desktop screenshot" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=32204", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 46 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 126, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find me {{product}} that {{condition}} in the image.", + "intent": "Find me the most recently listed item in the \"Books\" category that references the person in the image.", + "image": "https://images.pexels.com/photos/3592123/pexels-photo-3592123.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "the most recently listed item in the \"Books\" category", + "condition": "references the person" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=64659", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 30, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "classifieds" + ], + "task_id": 127, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Search for \"{{phrase}}\" and navigate to the {{product}}.", + "intent": "Search for \"MCAT\" and navigate to the prep book that has 2020-2021 on the cover.", + "image": null, + "instantiation_dict": { + "phrase": "MCAT", + "product": "prep book that has 2020-2021 on the cover" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=5802", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 42 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 128, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=item&id=74603", + "geolocation": null, + "intent_template": "Tell me the {{product}}.", + "intent": "Tell me the jersey numbers (in digits) in the image of this listing.", + "image": null, + "instantiation_dict": { + "product": "jersey numbers (in digits) in the image of this listing" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "99", + "13", + "80", + "92" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 40 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 129, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sOrder=dt_pub_date&iOrderType=desc&sPattern=soccer+cleats&sShowAs=gallery", + "geolocation": null, + "intent_template": "Tell me the {{product}}.", + "intent": "Tell me the last name of the seller whose listing image on the page shows the price.", + "image": null, + "instantiation_dict": { + "product": "last name of the seller whose listing image on the page shows the price" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Chatterjee" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 40 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 130, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sOrder=dt_pub_date&iOrderType=desc&sPattern=banana+boat&sShowAs=gallery", + "geolocation": null, + "intent_template": "Navigate to the item on this page whose image {{condition}}.", + "intent": "Navigate to the item on this page whose image is taken during a sunset.", + "image": null, + "instantiation_dict": { + "condition": "is taken during a sunset" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=19604", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 46 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 131, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Search for \"{{phrase}}\" and navigate to the {{product}}.", + "intent": "Search for \"dogs\" and navigate to the listing whose image consists of puppies in a basket.", + "image": null, + "instantiation_dict": { + "phrase": "dogs", + "product": "listing whose image consists of puppies in a basket" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=50134", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 42 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 132, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sOrder=dt_pub_date&iOrderType=desc&sPattern=dogs&sShowAs=gallery&iPage=3", + "geolocation": null, + "intent_template": "Navigate to the item on this page whose image {{condition}}.", + "intent": "Navigate to the item on this page whose image has 3 black animals.", + "image": null, + "instantiation_dict": { + "condition": "has 3 black animals" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=69201", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 46 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 133, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{item}} with {{attribute}}. Add a {{rating}} rating with title \"{{title}}\" and text \"{{text}}\".", + "intent": "Find the oldest listing of Xbox One games in the \"Video gaming\" category with more than 9 games in the listing image. Add a 4 star rating with title \"Nice collection\" and text \"Any other games?\".", + "image": null, + "instantiation_dict": { + "item": "oldest listing of Xbox One games in the \"Video gaming\" category", + "attribute": "more than 9 games in the listing image", + "rating": "4 star", + "title": "Nice collection", + "text": "Any other games?" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=67441", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Nice collection by Blake Sullivan" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=67441", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "Any other games?", + "4 of 5" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 35 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 134, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sOrder=dt_pub_date&iOrderType=desc&sPattern=+Xbox+One+games&sCategory=24&iPage=5&sShowAs=gallery", + "geolocation": null, + "intent_template": "Navigate to the {{product}} which {{condition}} in this image.", + "intent": "Navigate to the listing on this page whose image includes a jersey which is the same color as the item in this image.", + "image": "https://images.pexels.com/photos/36753/flower-purple-lical-blosso.jpg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "listing on this page whose image includes a jersey", + "condition": "is the same color as the item" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=42088", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 47 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 135, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sOrder=dt_pub_date&iOrderType=desc&sPattern=hood&sShowAs=gallery", + "geolocation": null, + "intent_template": "Navigate to the {{product}} on this page {{condition}}.", + "intent": "Navigate to the listing on this page with an item of the exact same color as the one in this image.", + "image": "https://images.pexels.com/photos/18756727/pexels-photo-18756727/free-photo-of-mistry-of-the-hot-pink-mclaren.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "listing", + "condition": "with an item of the exact same color as the one in this image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There are no pink items on the page.", + "reference_answer_raw_annotation": "N/A" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 48 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 136, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "I recall seeing this exact item on the site, tell me the email of the lister of the item. I recall {{hint}}.", + "intent": "I recall seeing this exact item on the site, tell me the email of the lister of the item. I recall that it was pretty cheap.", + "image": "__CLASSIFIEDS__/oc-content/uploads/37999/37999.png", + "instantiation_dict": { + "hint": "that it was pretty cheap" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "noah_popov126@example.com" + ] + }, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=37999", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 49 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 137, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "I recall seeing this exact item on the site, tell me the email of the most recent lister of the item. I recall {{hint}}.", + "intent": "I recall seeing this exact item on the site, tell me the email of the most recent lister of the item. I recall that it cost $20.", + "image": "__CLASSIFIEDS__/oc-content/uploads/49213/49213.png", + "instantiation_dict": { + "hint": "that it cost $20" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "mia_robinson403@example.com" + ] + }, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=19390", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 50 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 138, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "I recall seeing this exact item on the site, tell me the email of the most recent lister of the item. I recall {{hint}}.", + "intent": "I recall seeing this exact item on the site, tell me the email of the most recent lister of the item. I recall it was in the Art + crafts section and between $600-$1000.", + "image": "__CLASSIFIEDS__/oc-content/uploads/70697/70697.png", + "instantiation_dict": { + "hint": "it was in the Art + crafts section and between $600-$1000" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "sarah.sidorov92@example.com" + ] + }, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=70697", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 50 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 139, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "I recall seeing this exact item on the site, help me find the most recent post of it. I recall {{hint}}.", + "intent": "I recall seeing this exact item on the site, help me find the most recent post of it. I recall it was in the Antiques section.", + "image": "__CLASSIFIEDS__/oc-content/uploads/47595/47595.png", + "instantiation_dict": { + "hint": "it was in the Antiques section" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=47595", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 19 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 140, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "I recall seeing this exact item on the site, tell me the email of the most recent lister of the item. I recall {{hint}}.", + "intent": "I recall seeing this exact item on the site, tell me the email of the most recent lister of the item. I recall it was a Nascar jacket in the Collectibles section.", + "image": "__CLASSIFIEDS__/oc-content/uploads/3984/3984.png", + "instantiation_dict": { + "hint": "it was a Nascar jacket in the Collectibles section" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "juan_ortiz@example.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=3984", + "intent_template_id": 50 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 141, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "I recall seeing this exact item on the site, help me find the most recent post of it. I recall {{hint}}.", + "intent": "I recall seeing this exact item on the site, help me find the most recent post of it. I recall it was in the Farm + garden section.", + "image": "__CLASSIFIEDS__/oc-content/uploads/41568/41568.png", + "instantiation_dict": { + "hint": "it was in the Farm + garden section" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=41568", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 19 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 142, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "I recall seeing this exact item for under $20 on the site, tell me the state where this listing is located.", + "intent": "I recall seeing this exact item for under $20 on the site, tell me the state where this listing is located.", + "image": "__CLASSIFIEDS__/oc-content/uploads/22310/22310.png", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Pennsylvania" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=22310", + "intent_template_id": 51 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 143, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "I recall seeing this exact {{item}} on the site, add a comment on its listing with the title \"{{title}}\" and text \"{{comment}}\".", + "intent": "I recall seeing this exact item for under $15 in the Arts+ crafts section on the site, add a comment on its listing with the title \"Inquiry\" and text \"I need this!\".", + "image": "__CLASSIFIEDS__/oc-content/uploads/25810/25810.png", + "instantiation_dict": { + "item": "item for under $15 in the Arts+ crafts section", + "title": "Inquiry", + "comment": "I need this!" + }, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=25810", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Inquiry by Blake Sullivan" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=25810", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "I need this!" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=25810", + "intent_template_id": 52 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 144, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "I recall seeing this exact {{item}} on the site, add a comment on its listing with the title \"{{title}}\" and text \"{{comment}}\".", + "intent": "I recall seeing this exact item of pillows in the Household section on the site, add a comment on its listing with the title \"Commentary\" and text \"How funky...\".", + "image": "__CLASSIFIEDS__/oc-content/uploads/41127/41127.png", + "instantiation_dict": { + "item": "item of pillows in the Household section", + "title": "Commentary", + "comment": "How funky..." + }, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=41127", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Commentary by Blake Sullivan" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=41127", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "How funky..." + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=41127", + "intent_template_id": 52 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 145, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "I recall seeing this exact {{item}} on the site, add a comment on its listing with the title \"{{title}}\" and text \"{{comment}}\".", + "intent": "I recall seeing this exact expensive item in the Video Gaming category on the site, add a comment on its listing with the title \"Urgent\" and text \"Need this for my bowling alley.\".", + "image": "__CLASSIFIEDS__/oc-content/uploads/43992/43992.png", + "instantiation_dict": { + "item": "expensive item in the Video Gaming category", + "title": "Urgent", + "comment": "Need this for my bowling alley." + }, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=43992", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Urgent by Blake Sullivan" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=43992", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "Need this for my bowling alley." + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=43992", + "intent_template_id": 52 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 146, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=9&sOrder=i_price&iOrderType=asc&iPage=124&sShowAs=gallery", + "geolocation": null, + "intent_template": "Navigate to the {{product}} on this page {{condition}}.", + "intent": "Navigate to the item on this page whose image has exactly 4 books.", + "image": null, + "instantiation_dict": { + "product": "item", + "condition": "whose image has exactly 4 books" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=66304", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 48 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 147, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=24&sOrder=i_price&iOrderType=desc&iPage=14&sShowAs=gallery", + "geolocation": null, + "intent_template": "Navigate to the {{product}} on this page {{condition}}.", + "intent": "Navigate to the item on this page whose image has multiple game consoles from different brands.", + "image": null, + "instantiation_dict": { + "product": "item", + "condition": "whose image has multiple game consoles from different brands" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=35394", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 48 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 148, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=24&sOrder=i_price&iOrderType=desc&iPage=17&sShowAs=gallery", + "geolocation": null, + "intent_template": "For the {{item}}, tell me the {{detail}}.", + "intent": "For the item with the Amazon webpage in its image, tell me the number of ratings the item on the page has.", + "image": null, + "instantiation_dict": { + "item": "item with the Amazon webpage in its image", + "detail": "number of ratings the item on the page has" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "128 |OR| hundred twenty eight |OR| one hundred twenty eight" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=33746", + "intent_template_id": 53 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 149, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=24&sOrder=i_price&iOrderType=desc&iPage=22&sShowAs=gallery", + "geolocation": null, + "intent_template": "For the {{item}}, tell me the {{detail}}.", + "intent": "For the item with a gold colored controller, tell me the number of games displayed in the image.", + "image": null, + "instantiation_dict": { + "item": "item with a gold colored controller", + "detail": "number of games displayed in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "12 |OR| twelve" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=53116", + "intent_template_id": 53 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 150, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=16&sOrder=i_price&iOrderType=asc&iPage=331&sShowAs=gallery", + "geolocation": null, + "intent_template": "For the {{item}}, tell me the {{detail}}.", + "intent": "For the item with the Amazon webpage in its image, tell me the price in dollars displayed on the Amazon site.", + "image": null, + "instantiation_dict": { + "item": "item with the Amazon webpage in its image", + "detail": "price in dollars displayed on the Amazon site" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "249 |OR| two hundred twenty nine" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=9246", + "intent_template_id": 53 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 151, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=9&iPage=2&sShowAs=gallery", + "geolocation": null, + "intent_template": "Navigate to the item on this page whose image {{condition}}.", + "intent": "Navigate to the item on this page whose image has pink sticky notes.", + "image": null, + "instantiation_dict": { + "condition": "has pink sticky notes" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=22560", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 46 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 152, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=22&sShowAs=gallery&iPage=11", + "geolocation": null, + "intent_template": "Navigate to the item on this page whose image {{condition}}.", + "intent": "Navigate to the item on this page whose image includes a human hand.", + "image": null, + "instantiation_dict": { + "condition": "includes a human hand" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=81346", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 46 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 153, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/php?page=search&sCategory=4&sShowAs=gallery", + "geolocation": null, + "intent_template": "Navigate to the {{item}} on this page {{condition}}.", + "intent": "Navigate to the item on this page that most similarly depicts the setting in the image.", + "image": "https://images.pexels.com/photos/9344514/pexels-photo-9344514.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "item": "item", + "condition": "that most similarly depicts the setting in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=14761", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 45 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 154, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Explore the \"{{category}}\" category, and find the {{item}}.", + "intent": "Explore the \"Furniture\" category, and find the oldest listed item whose image is taken outside.", + "image": null, + "instantiation_dict": { + "category": "Furniture", + "item": "oldest listed item whose image is taken outside" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=67923", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 36 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 155, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Explore the \"{{category}}\" category, and find the {{item}}.", + "intent": "Explore the \"Computers\" category, and find the oldest listed item whose image shows a Microsoft Windows background.", + "image": null, + "instantiation_dict": { + "category": "Computers", + "item": "oldest listed item whose image shows a Microsoft Windows background" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=14744", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 36 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 156, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Explore the \"{{category}}\" category, and find the {{item}}.", + "intent": "Explore the \"Bikes\" category, and find the oldest listed item whose image is taken on carpet.", + "image": null, + "instantiation_dict": { + "category": "Bikes", + "item": "oldest listed item whose image is taken on carpet" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=6429", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 36 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 157, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Explore the \"{{category}}\" category, and find the {{item}}.", + "intent": "Explore the \"Music instruments\" category, and find the oldest listed item whose image shows an instruction manual.", + "image": null, + "instantiation_dict": { + "category": "Music instruments", + "item": "oldest listed item whose image shows an instruction manual" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=33917", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 36 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 158, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Explore the \"{{category}}\" category, and find the {{item}}.", + "intent": "Explore the \"Jewelry\" category, and find the oldest listed necklace whose image shows the necklace being worn by a human.", + "image": null, + "instantiation_dict": { + "category": "Jewelry", + "item": "oldest listed necklace whose image shows the necklace being worn by a human" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=69302", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 36 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 159, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to my listing {{item}} and {{action}}.", + "intent": "Navigate to my listing with a rug in the image and add to the description that there are no refunds.", + "image": null, + "instantiation_dict": { + "item": "with a rug in the image", + "action": "add to the description that there are no refunds" + }, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=84154", + "locator": "func:get_query_text(__page__, '.desc')", + "required_contents": { + "must_include": [ + "no refund |OR| no refunds |OR| 0 refund |OR| zero refunds" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 2 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 160, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=5&sShowAs=gallery", + "geolocation": null, + "intent_template": "Add a comment on the {{item}} with the title \"{{title}}\" and text \"{{comment}}\".", + "intent": "Add a comment on the item on this page whose image is taken inside a garage with the title \"More Pictures\" and text \"Could you post different angles?\".", + "image": null, + "instantiation_dict": { + "item": "item on this page whose image is taken inside a garage", + "title": "More Pictures", + "comment": "Could you post different angles?" + }, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=13412", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "More Pictures by Blake Sullivan" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=13412", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "Could you post different angles?" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=13412", + "intent_template_id": 11 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 161, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/", + "geolocation": null, + "intent_template": "Search for \"{{phrase}}\" and tell me {{detail}}.", + "intent": "Search for \"mountain bike\" and tell me the predominant color of the frame of the cheapest bike, ignoring $1 listings.", + "image": null, + "instantiation_dict": { + "phrase": "mountain bike", + "detail": "the predominant color of the frame of the cheapest bike, ignoring $1 listings" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "purple" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "mediun", + "overall_difficulty": "hard", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=22441", + "intent_template_id": 16 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 162, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/", + "geolocation": null, + "intent_template": "Explore the \"{{category}}\" category, and find the {{item}}.", + "intent": "Explore the \"Beauty + Health\" category, and find the most recent item that shows a tennis ball.", + "image": null, + "instantiation_dict": { + "category": "Beauty + Health", + "item": "most recent item that shows a tennis ball" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=61809", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 36 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 163, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=18", + "geolocation": null, + "intent_template": "For the {{item}} on this page {{attribute}}, tell me the {{detail}}.", + "intent": "For the item on this page with the Amazon webpage in its image, tell me the price in dollars displayed on the Amazon site.", + "image": null, + "instantiation_dict": { + "item": "item", + "attribute": "with the Amazon webpage in its image", + "detail": "price in dollars displayed on the Amazon site" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "40.92" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=52143", + "intent_template_id": 54 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 164, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=10&iPage=4y", + "geolocation": null, + "intent_template": "Navigate to the {{item}} on this page whose image {{condition}} as the item in the image provided.", + "intent": "Navigate to the listing on this page whose image includes a car the same color as the item in the image provided.", + "image": "https://images.pexels.com/photos/2872755/pexels-photo-2872755.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "item": "listing", + "condition": "includes a car the same color" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There are no yellow items on the page.", + "reference_answer_raw_annotation": "N/A" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 55 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 165, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=10&iPage=8", + "geolocation": null, + "intent_template": "Navigate to the {{item}} on this page whose image {{condition}} as the item in the image provided.", + "intent": "Navigate to the listing on this page whose image includes a car the same color as the item in the image provided.", + "image": "https://images.pexels.com/photos/6944172/pexels-photo-6944172.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "item": "listing", + "condition": "includes a car the same color" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=34939", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 55 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 166, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=21&iPage=11", + "geolocation": null, + "intent_template": "Navigate to the {{item}} on this page whose image {{condition}} as the item in the image provided.", + "intent": "Navigate to the listing on this page whose image includes an instrument the same color as the item in the image provided.", + "image": "https://images.pexels.com/photos/1153655/pexels-photo-1153655.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "item": "listing", + "condition": "includes an instrument the same color" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=40109", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 55 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 167, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=21&iPage=8", + "geolocation": null, + "intent_template": "Navigate to the {{item}} on this page whose image {{condition}} as the item in the image provided.", + "intent": "Navigate to the listing on this page whose image includes an instrument the same color as the item in the image provided.", + "image": "https://images.pexels.com/photos/693794/pexels-photo-693794.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "item": "listing", + "condition": "includes an instrument the same color" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There are no green instruments on the page.", + "reference_answer_raw_annotation": "N/A" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 55 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 168, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the cheapest {{item}} {{condition}}.", + "intent": "Find the cheapest motorcycle which is of the same color as the item in the image provided.", + "image": "https://images.pexels.com/photos/2737656/pexels-photo-2737656.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "item": "motorcycle", + "condition": "which is of the same color as the item in the image provided" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=45405", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 32 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 169, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to the {{product}} that costs {{range}}.", + "intent": "Navigate to the cheapest listing in the \"Cell phones\" category with a black phone that costs more than $1.", + "image": null, + "instantiation_dict": { + "product": "cheapest listing in the \"Cell phones\" category with a black phone", + "range": "more than $1" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=50277", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 56 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 170, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to the {{product}} which {{condition}} in this image.", + "intent": "Navigate to the most expensive item in the \"Cell phones\" category which includes in its design the character in this image.", + "image": "https://images.pexels.com/photos/9482194/pexels-photo-9482194.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "most expensive item in the \"Cell phones\" category", + "condition": "includes in its design the character" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=36313", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 47 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 171, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to the {{product}}.", + "intent": "Navigate to the oldest listing in the \"Cell phones\" category which has a hot pink screen in the image.", + "image": null, + "instantiation_dict": { + "product": "oldest listing in the \"Cell phones\" category which has a hot pink screen in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=32832", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 57 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 172, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to the {{product}}.", + "intent": "Navigate to the cheapest listing in the \"Furniture category\" with three blue chairs suitable for a workplace in the image.", + "image": null, + "instantiation_dict": { + "product": "cheapest listing in the \"Furniture category\" with three blue chairs suitable for a workplace in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=23701", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 57 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 173, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=17&sOrder=i_price&iOrderType=desc&sShowAs=gallery", + "geolocation": null, + "intent_template": "For the {{item}} on this page {{attribute}}, tell me the {{detail}}.", + "intent": "For the item on this page with a website address in the image, tell me the address.", + "image": null, + "instantiation_dict": { + "item": "item", + "attribute": "with a website address in the image", + "detail": "address" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "kaiyo.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 54 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 174, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=17&sOrder=i_price&iOrderType=desc&sShowAs=gallery", + "geolocation": null, + "intent_template": "For the {{item}} on this page {{attribute}}, tell me the {{detail}}.", + "intent": "For the item on this page which includes a Black Friday logo in the image, tell me the most specific location given of the posting.", + "image": null, + "instantiation_dict": { + "item": "item", + "attribute": "which includes a Black Friday logo in the image", + "detail": "most specific location given of the posting" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Shaw", + "Washington, D.C." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 54 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 175, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to the {{product}} from {{location}} that {{condition}}.", + "intent": "Navigate to the cheapest controller in the \"Video gaming\" category from Ohio or Pennsylvania that is predominantly white.", + "image": null, + "instantiation_dict": { + "product": "cheapest controller in the \"Video gaming\" category", + "location": "Ohio or Pennsylvania", + "condition": "is predominantly white" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=80517", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 58 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 176, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to the {{product}} from {{location}} that {{condition}}.", + "intent": "Navigate to the most expensive motorcycle from Virginia or Maryland that has both red and blue colors.", + "image": null, + "instantiation_dict": { + "product": "most expensive motorcycle", + "location": "Virginia or Maryland", + "condition": "has both red and blue colors" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=78122", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 58 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 177, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to the {{product}} from {{location}} that {{condition}}.", + "intent": "Navigate to the cheapest listing in the \"Cars + trucks\" category from Ohio or Maryland that is the same color as the objects in this image.", + "image": "https://images.pexels.com/photos/1179229/pexels-photo-1179229.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "cheapest listing in the \"Cars + trucks\" category", + "location": "Ohio or Maryland", + "condition": "is the same color as the objects in this image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=15340", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 58 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 178, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "For the {{item}}, tell me the {{detail}}.", + "intent": "For the cheapest listing in the \"Music instruments\" category with a white guitar from West Virginia or Delaware, tell me the price in dollars.", + "image": null, + "instantiation_dict": { + "item": "cheapest listing in the \"Music instruments\" category with a white guitar from West Virginia or Delaware", + "detail": "price in dollars" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "41 |OR| fourty one" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 53 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 179, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{product}} with {{detail}} from the image {{location}}.", + "intent": "Find the most expensive item in the \"Video gaming\" category with the character on the shirt from the image on its decal.", + "image": "https://images.pexels.com/photos/2560894/pexels-photo-2560894.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "most expensive item in the \"Video gaming\" category", + "detail": "the character on the shirt", + "location": "on its decal" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=6909", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 25 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 180, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{item}} with {{attribute}}. Add a {{rating}} rating with title \"{{title}}\" and text \"{{text}}\".", + "intent": "Find the most recently listed coffee maker with a touch screen. Add a 5 star rating with title \"Great item\" and text \"Would recommend!\".", + "image": null, + "instantiation_dict": { + "item": "most recently listed coffee maker", + "attribute": "a touch screen", + "rating": "5 star", + "title": "Great item", + "text": "Would recommend!" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=15810", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Great item by Blake Sullivan" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=15810", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "Would recommend!", + "5 of 5" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 35 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 181, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to the {{product}} with {{detail}} from the image.", + "intent": "Navigate to the most expensive item in the \"Video gaming\" category with the same item from the image.", + "image": "https://images.pexels.com/photos/33571/tutankhamun-death-mask-pharaonic-egypt.jpg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "most expensive item in the \"Video gaming\" category", + "detail": "the same item" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=7915", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 59 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 182, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to the {{product}}.", + "intent": "Navigate to the most recent video game listing with the the Playstation home screen in the image.", + "image": null, + "instantiation_dict": { + "product": "most recent video game listing with the the Playstation home screen in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=41845", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 57 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 183, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to the {{product}} with {{detail}} from the image.", + "intent": "Navigate to the most recently listed item in the \"Collectibles\" category with the same character from the image.", + "image": "https://images.pexels.com/photos/12695401/pexels-photo-12695401.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "most recently listed item in the \"Collectibles\" category", + "detail": "the same character" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=40185", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 59 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 184, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to the {{product}} with {{detail}} from the image.", + "intent": "Navigate to the most expensive item in the \"Collectibles\" category with the same exact item from the image.", + "image": "https://images.pexels.com/photos/9661257/pexels-photo-9661257.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "most expensive item in the \"Collectibles\" category", + "detail": "the same exact item" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=18837", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 59 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 185, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find me {{product}} that {{condition}} in the image.", + "intent": "Find me the most expensive Collectibles listing that has a listing image that references a sports team for the city in the image.", + "image": "https://images.pexels.com/photos/1750754/pexels-photo-1750754.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "the most expensive Collectibles listing", + "condition": "has a listing image that references a sports team for the city" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=33317", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 30, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "classifieds" + ], + "task_id": 186, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "For the {{item}}, tell me the {{detail}}.", + "intent": "For the most recently listed item in the \"Books\" category of a single book with a car on the cover, tell me the color of the car.", + "image": null, + "instantiation_dict": { + "item": "most recently listed item in the \"Books\" category of a single book with a car on the cover", + "detail": "color of the car" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "yellow" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=9146", + "intent_template_id": 53 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 187, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=9&iPage=6&sShowAs=gallery", + "geolocation": null, + "intent_template": "Navigate to the {{product}} where {{detail}}.", + "intent": "Navigate to the item on the page where Lightning McQueen is in the image.", + "image": null, + "instantiation_dict": { + "product": "item on the page", + "detail": "Lightning McQueen is in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=79133", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 60 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 188, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=9&iPage=14&sShowAs=gallery", + "geolocation": null, + "intent_template": "Navigate to the {{product}} where {{detail}}.", + "intent": "Navigate to the book listing on the page where the cover includes a baby.", + "image": null, + "instantiation_dict": { + "product": "book listing on the page", + "detail": "the cover includes a baby" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=55076", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 60 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 189, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=24&sShowAs=gallery&iPage=5", + "geolocation": null, + "intent_template": "Navigate to the {{product}} where {{detail}}.", + "intent": "Navigate to the item on the page where Mario is in the image.", + "image": null, + "instantiation_dict": { + "product": "item on the page", + "detail": "Mario is in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There are no items with Mario on the page.", + "reference_answer_raw_annotation": "N/A" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 60 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 190, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=24&sShowAs=gallery&iPage=5", + "geolocation": null, + "intent_template": "Navigate to the {{product}} with {{detail}} from the image {{location}}.", + "intent": "Navigate to the cheapest video game item on this page with the character on the shirt from the image in its listing image.", + "image": "https://images.pexels.com/photos/2560894/pexels-photo-2560894.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "cheapest video game item on this page", + "detail": "the character on the shirt", + "location": "in its listing image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=29518", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 61 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 191, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=16&sOrder=dt_pub_date&iOrderType=desc&iPage=3&sShowAs=gallery", + "geolocation": null, + "intent_template": "Navigate to the {{product}} on this page {{condition}}.", + "intent": "Navigate to the item on this page with a blonde horse.", + "image": null, + "instantiation_dict": { + "product": "item", + "condition": "with a blonde horse" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There are no items with a blonde horse on the page.", + "reference_answer_raw_annotation": "N/A" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 48 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 192, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=9&sShowAs=gallery&sOrder=i_price&iOrderType=asc&iPage=119", + "geolocation": null, + "intent_template": "For the {{item}} on this page {{attribute}}, tell me the {{detail}}.", + "intent": "For the listing on this page with books of cars, tell me the primary color of the cars.", + "image": null, + "instantiation_dict": { + "item": "listing", + "attribute": "with books of cars", + "detail": "primary color of the cars" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "red", + "white" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__CLASSIFIEDS__/index.php?page=item&id=57831", + "intent_template_id": 54 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 193, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=9&sShowAs=gallery&sOrder=dt_pub_date&iOrderType=desc&iPage=106", + "geolocation": null, + "intent_template": "Navigate to the {{product}} on this page {{condition}}.", + "intent": "Navigate to the item on this page with CDs in the image.", + "image": null, + "instantiation_dict": { + "product": "item", + "condition": "with CDs in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=2753", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 48 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 194, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=search&sCategory=9&sShowAs=gallery&sOrder=dt_pub_date&iOrderType=desc&iPage=90", + "geolocation": null, + "intent_template": "Navigate to the {{product}} where {{detail}}.", + "intent": "Navigate to the item on the page where Mickey Mouse is in the image.", + "image": null, + "instantiation_dict": { + "product": "item on the page", + "detail": "Mickey Mouse is in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There are no items with Mickey Mouse on the page.", + "reference_answer_raw_annotation": "N/A" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 60 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 195, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to the {{product}} that costs {{range}}.", + "intent": "Navigate to the most expensive yellow motorcycle in the \"Motorcycles\" category that costs at least 25,000 dollars.", + "image": null, + "instantiation_dict": { + "product": "most expensive yellow motorcycle in the \"Motorcycles\" category", + "range": "at least 25,000 dollars" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There are no yellow motorcycles worth at least 25000 dollars.", + "reference_answer_raw_annotation": "N/A" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 56 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 196, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to the {{product}} from {{location}} that {{condition}}.", + "intent": "Navigate to the most expensive red truck in the \"Cars + trucks\" category from Maryland that is at least 50,000 dollars.", + "image": null, + "instantiation_dict": { + "product": "most expensive red truck in the \"Cars + trucks\" category", + "location": "Maryland", + "condition": "is at least 50,000 dollars" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There are no red trucks from Maryland worth at least 50000 dollars.", + "reference_answer_raw_annotation": "N/A" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 58 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 197, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to the {{product}} from {{location}} that {{condition}}.", + "intent": "Navigate to the cheapest boat in the \"Boats\" category from Pennsylvania that has red highlights and is at least 30,000 dollars.", + "image": null, + "instantiation_dict": { + "product": "cheapest boat in the \"Boats\" category", + "location": "Pennsylvania", + "condition": "has red highlights and is at least 30,000 dollars" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=3256", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 58 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 198, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Navigate to the {{product}} that costs {{range}}.", + "intent": "Navigate to the cheapest watch with hints of faded red and green that costs at least 13,000 dollars.", + "image": null, + "instantiation_dict": { + "product": "cheapest watch with hints of faded red and green", + "range": "at least 13,000 dollars" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=37012", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 56 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 199, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=item&id=43887", + "geolocation": null, + "intent_template": "Tell me the {{product}}.", + "intent": "Tell me the website mentioned in the image.", + "image": null, + "instantiation_dict": { + "product": "website mentioned in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "kaiyo.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 40 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 200, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__SHOPPING__/24-pcs-lot-wild-animal-cupcake-wrappers-birthday-cake-party-decoration-supplies-lion-monkey-zebra-elephant.html |AND| __CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Show me the {{attribute}} {{item}} from the classifieds site {{description}} on OneStopMarket.", + "intent": "Show me the latest listing of a pillow from the classifieds site with a pattern matching the top left animal in the first image of the listing on OneStopMarket.", + "image": null, + "instantiation_dict": { + "attribute": "latest", + "item": "listing of a pillow", + "description": "with a pattern matching the top left animal in the first image of the listing" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=65205", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 62 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 201, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Show me the {{attribute}} {{item}} {{condition}}.", + "intent": "Show me the latest listing of a snare drum with a black and red color scheme.", + "image": null, + "instantiation_dict": { + "attribute": "latest", + "item": "listing of a snare drum", + "condition": "with a black and red color scheme" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=50230", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 63 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 202, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Show me the {{attribute}} {{item}} {{condition}}.", + "intent": "Show me the cheapest penguin-themed item in West Virginia.", + "image": null, + "instantiation_dict": { + "attribute": "cheapest", + "item": "penguin-themed item", + "condition": "in West Virginia" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=6247", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 63 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 203, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=item&id=41938", + "geolocation": null, + "intent_template": "Help me leave a comment with the title \"Interested\" with the text \"I want to buy this item\", if the item comes with a cable that is able to connect to my USB-C ports, else leave a comment with the same title but with the text \"Do you have a USB-C cable?\".", + "intent": "Help me leave a comment with the title \"Interested\" with the text \"I want to buy this item\", if the item comes with a cable that is able to connect to my USB-C ports, else leave a comment with the same title but with the text \"Do you have a USB-C cable?\".", + "image": null, + "instantiation_dict": {}, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=41938", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Interested by Blake Sullivan" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=41938", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "Do you have a USB-C cable?" + ] + } + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 64 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 204, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{item}} that {{condition}}.", + "intent": "Find the cheapest guitar between $1200 and $1500 that is made of lightly-colored wood and comes with a soft guitar case that has blue-colored insides.", + "image": null, + "instantiation_dict": { + "item": "cheapest guitar between $1200 and $1500", + "condition": "is made of lightly-colored wood and comes with a soft guitar case that has blue-colored insides" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=76469", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 23 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 205, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=item&id=65673 |AND| __CLASSIFIEDS__/index.php?page=item&id=43183", + "geolocation": null, + "intent_template": "Which palette is more value-for-money in terms of price per distinct color? Help me make a comment on that one with the title \"Would like to buy\" and text \"Do you accept PayPal?\".", + "intent": "Which palette is more value-for-money in terms of price per distinct color? Help me make a comment on that one with the title \"Would like to buy\" and text \"Do you accept PayPal?\".", + "image": null, + "instantiation_dict": {}, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=43183", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Would like to buy by Blake Sullivan" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=43183", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "Do you accept PayPal?" + ] + } + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 65 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 206, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{item}} that {{condition}}.", + "intent": "Find the cheapest freezer with a warranty between $250 and $280 that does not have a fridge.", + "image": null, + "instantiation_dict": { + "item": "cheapest freezer with a warranty between $250 and $280", + "condition": "does not have a fridge" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=32949", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 23 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 207, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__SHOPPING__/nintendo-switch-oled-model-w-neon-red-neon-blue-joy-con.html |AND| __CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Show me the {{attribute}} {{item}} from the classifieds site {{description}} on OneStopMarket.", + "intent": "Show me the cheapest headphones from the classifieds site that matches the color scheme of the item on OneStopMarket.", + "image": null, + "instantiation_dict": { + "attribute": "cheapest", + "item": "headphones", + "description": "that matches the color scheme of the item" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=44307", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 62 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 208, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=item&id=16826", + "geolocation": null, + "intent_template": "Identify the insect in the picture. Leave a comment with the title \"Questions\" and text containing the insect's identity, with the purpose of confirming with the seller.", + "intent": "Identify the insect in the picture. Leave a comment with the title \"Questions\" and text containing the insect's identity, with the purpose of confirming with the seller.", + "image": null, + "instantiation_dict": {}, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=16826", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Questions by Blake Sullivan" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=16826", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "moth |OR| butterfly" + ] + } + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 66 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 209, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=item&id=27156", + "geolocation": null, + "intent_template": "What is the {{detail}} that {{product}}?", + "intent": "What is the cheapest price that I can pay for 31 of these wooden pallets?", + "image": null, + "instantiation_dict": { + "detail": "cheapest price", + "product": "I can pay for 31 of these wooden pallets" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "$208" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "Buying in batches 15 is cheaper, a fact which is only spelled out in the image and not the description", + "intent_template_id": 67 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 210, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find me the {{attribute}} {{item}} on this site.", + "intent": "Find me the cheapest lamb in the \"Farm + garden\" category on this site.", + "image": null, + "instantiation_dict": { + "attribute": "cheapest", + "item": "lamb in the \"Farm + garden\" category" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=81060", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 0 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 211, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find me the {{attribute}} {{item}} on this site.", + "intent": "Find me the cheapest farm pig in the \"Farm + garden\" category on this site.", + "image": null, + "instantiation_dict": { + "attribute": "cheapest", + "item": "farm pig in the \"Farm + garden\" category" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=72643", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 0 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 212, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find me the {{attribute}} {{item}} from the classifieds site that {{condition}}.{{action}}", + "intent": "Find me the cheapest watch or necklace from the classifieds site that is from the location \"Pennsylvania\". Do not take the listing price at face value and use the actual item price for comparison, which can be verified in the listing description.", + "image": null, + "instantiation_dict": { + "attribute": "cheapest", + "item": "watch or necklace", + "condition": "is from the location \"Pennsylvania\"", + "action": " Do not take the listing price at face value and use the actual item price for comparison, which can be verified in the listing description." + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=47839", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "The other watch/ring listings on the website are fake listings that state their actual price in the description. Super hard task.", + "intent_template_id": 68 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 213, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=item&id=79640", + "geolocation": null, + "intent_template": "Find me the seller's email if the item depicts Elvis Presley's performance without the audience shown, else leave a comment with the title \"Question\" and text \"Do you have one without the audience?\". Also, repeat the seller's email back to him in the comment's text to confirm it.", + "intent": "Find me the seller's email if the item depicts Elvis Presley's performance without the audience shown, else leave a comment with the title \"Question\" and text \"Do you have one without the audience?\". Also, repeat the seller's email back to him in the comment's text to confirm it.", + "image": null, + "instantiation_dict": {}, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=79640", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Question by Blake Sullivan" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=79640", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "Do you have one without the audience?", + "joo-won_hernandez19@example.com" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "The phone number is spelled wrongly in the item's description. Expectation is that it should be corrected by the agent performing the task.", + "intent_template_id": 69 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 214, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Show me the {{attribute}} {{item}} {{condition}}.", + "intent": "Show me the most expensive phone with a theme matching that of the image.", + "image": "https://images.pexels.com/photos/3526022/pexels-photo-3526022.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "attribute": "most expensive", + "item": "phone", + "condition": "with a theme matching that of the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=36313", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 63 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 215, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Show me the {{attribute}} {{item}} {{condition}}.", + "intent": "Show me the most expensive camera that is for sale (as confirmed in the listing description).", + "image": null, + "instantiation_dict": { + "attribute": "most expensive", + "item": "camera", + "condition": "that is for sale (as confirmed in the listing description)" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=66657", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 63 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 216, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Show me the {{attribute}} {{item}} {{condition}}.", + "intent": "Show me the cheapest oval-shaped table that is between $420 and $430.", + "image": null, + "instantiation_dict": { + "attribute": "cheapest", + "item": "oval-shaped table", + "condition": "that is between $420 and $430" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=66046", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 63 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 217, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find me the {{attribute}} {{item}} from the classifieds site that {{condition}}.{{action}}", + "intent": "Find me the most recent listing offering a book titled \"Captain's Logs: The Complete Trek Voyages\" from the classifieds site that is priced between $170 and $200. Leave a comment on the listing with the title \"Interested in buying the book\" and text \"Is the Captain's Log book still available?\".", + "image": null, + "instantiation_dict": { + "attribute": "most recent listing offering", + "item": "a book titled \"Captain's Logs: The Complete Trek Voyages\"", + "condition": "is priced between $170 and $200", + "action": " Leave a comment on the listing with the title \"Interested in buying the book\" and text \"Is the Captain's Log book still available?\"." + }, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=27617", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Interested in buying the book by Blake Sullivan" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=27617", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "Is the Captain's Log book still available?" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 68 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 218, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Show me the {{attribute}} {{item}} {{condition}}.", + "intent": "Show me the most expensive collectible historical USA coin that is under $2000.", + "image": null, + "instantiation_dict": { + "attribute": "most expensive", + "item": "collectible historical USA coin", + "condition": "that is under $2000" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=82531", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "It only says it is a USA coin in the image itself.", + "intent_template_id": 63 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 219, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Show me the {{attribute}} {{item}} {{condition}}.", + "intent": "Show me the cheapest coat hanger between $15 and $20 that can stand on its own.", + "image": null, + "instantiation_dict": { + "attribute": "cheapest", + "item": "coat hanger", + "condition": "between $15 and $20 that can stand on its own" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=34863 |OR| __CLASSIFIEDS__/index.php?page=item&id=27330 |OR| __CLASSIFIEDS__/index.php?page=item&id=62449", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 63 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 220, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find the {{item}} that {{condition}}.", + "intent": "Find the cheapest red-colored microwave that is $10 and under and still works.", + "image": null, + "instantiation_dict": { + "item": "cheapest red-colored microwave", + "condition": "is $10 and under and still works" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The only red microwave doesn't turn on" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "The red microwave in question is __CLASSIFIEDS__/index.php?page=item&id=47242", + "intent_template_id": 23 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 221, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=item&id=48575", + "geolocation": null, + "intent_template": "How many {{attribute}} are {{listing detail}}?", + "intent": "How many bowls are there in this set of bowls?", + "image": null, + "instantiation_dict": { + "attribute": "bowls", + "listing detail": "there in this set of bowls" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "6" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 37 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 222, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=item&id=34501", + "geolocation": null, + "intent_template": "Based on the measuring tape in the image, is the stated diameter of this item correct?", + "intent": "Based on the measuring tape in the image, is the stated diameter of this item correct?", + "image": null, + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "yes" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 70 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 223, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=item&id=13215", + "geolocation": null, + "intent_template": "If there is a Santa Claus card or envelope in the listing, help me to comment with the title \"Other cards?\" and with text asking \"Are there any other Santa Claus themed envelopes?\"", + "intent": "If there is a Santa Claus card or envelope in the listing, help me to comment with the title \"Other cards?\" and with text asking \"Are there any other Santa Claus themed envelopes?\"", + "image": null, + "instantiation_dict": {}, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=12085", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Other cards? by Blake Sullivan" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=12085", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "Are there any other Santa Claus themed envelopes?" + ] + } + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 71 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 224, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__ |AND| __SHOPPING__/dawn-ultra-dishwashing-liquid-dish-soap-4x19-4-fl-oz-non-scratch-sponge-2-count-original-scent.html", + "geolocation": null, + "intent_template": "Show me the {{attribute}} {{item}} from the classifieds site {{description}} on OneStopMarket.", + "intent": "Show me the cheapest wall rack between $30-40 from the classifieds site that is in the shape of the animal in the item's image on OneStopMarket.", + "image": null, + "instantiation_dict": { + "attribute": "cheapest", + "item": "wall rack between $30-40", + "description": "that is in the shape of the animal in the item's image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=70647", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 62 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 225, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__SHOPPING__/womens-hoodies-clearance-tie-die-printed-hoodies-sweatshirt-long-sleeve-drawstring-pullover-tops-hooded-shirts.html |AND| __CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Show me the {{attribute}} {{item}} from the classifieds site {{description}} on OneStopMarket.", + "intent": "Show me the most expensive ring from the classifieds site with the same shape as the shape on the right side of the hoodie shown on OneStopMarket.", + "image": null, + "instantiation_dict": { + "attribute": "most expensive", + "item": "ring", + "description": "with the same shape as the shape on the right side of the hoodie shown" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=66601", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 62 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 226, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__ |AND| __SHOPPING__/5pcs-30ml-cute-mini-portable-gel-silicone-hand-bottle-cartoon-empty-spray-bottle-cosmetic-container-refillable-bottle-hand-soap-bottle.html", + "geolocation": null, + "intent_template": "Show me the {{attribute}} {{item}} from the classifieds site {{description}} on OneStopMarket.", + "intent": "Show me the cheapest figurine from the classifieds site that is of the third animal from the left shown in the product image on OneStopMarket.", + "image": null, + "instantiation_dict": { + "attribute": "cheapest", + "item": "figurine", + "description": "that is of the third animal from the left shown in the product image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=8964", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 62 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 227, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__ |AND| __SHOPPING__/storage-basket-cube-forest-animal-fox-deer-flower-large-collapsible-toys-storage-box-bin-laundry-organizer-for-closet-shelf-nursery-kids-bedroom-15x11x9-5-in-1-pack.html", + "geolocation": null, + "intent_template": "Show me the {{attribute}} {{item}} from the classifieds site {{description}} on OneStopMarket.", + "intent": "Show me the cheapest clock from the classifieds site with the green animal at the top of the image on OneStopMarket.", + "image": null, + "instantiation_dict": { + "attribute": "cheapest", + "item": "clock", + "description": "with the green animal at the top of the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=71222", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 62 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 228, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__ |AND| __SHOPPING__/posterazzi-pdxjp6549small-live-photo-print-12-x-24-multi.html", + "geolocation": null, + "intent_template": "Find me the {{attribute}} {{item}} from the classifieds site that {{condition}}.{{action}}", + "intent": "Find me the most recent live plant listing from the classifieds site that is the same plant type as the plant shown in the image on OneStopMarket. Then, if there are at least 3 of those plants below 9ft tall shown in the product listing image, comment with the title \"Interested\" and text \"I want 3\".", + "image": null, + "instantiation_dict": { + "attribute": "most recent", + "item": "live plant listing", + "condition": "is the same plant type as the plant shown in the image on OneStopMarket", + "action": " Then, if there are at least 3 of those plants below 9ft tall shown in the product listing image, comment with the title \"Interested\" and text \"I want 3\"." + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=58212", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Interested by Blake Sullivan" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=58212", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "I want 3" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 68 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 229, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=item&id=47824 |AND| __CLASSIFIEDS__/index.php?page=item&id=23338 |AND| __CLASSIFIEDS__/index.php?page=item&id=42770", + "geolocation": null, + "intent_template": "Out of the three tabs, ignoring color, which two {{item}} look the most alike in terms of design? Comment on those two listings, with the title \"Buying\" and text \"Can I get one?\".", + "intent": "Out of the three tabs, ignoring color, which two rings look the most alike in terms of design? Comment on those two listings, with the title \"Buying\" and text \"Can I get one?\".", + "image": null, + "instantiation_dict": { + "item": "rings" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=47824", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Buying by Blake Sullivan" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=47824", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "Can I get one?" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=42770", + "locator": "func:get_query_text(__page__, '.comments_list h3')", + "required_contents": { + "must_include": [ + "Buying by Blake Sullivan" + ] + } + }, + { + "url": "__CLASSIFIEDS__/index.php?page=item&id=42770", + "locator": "func:get_query_text(__page__, '.comments_list')", + "required_contents": { + "must_include": [ + "Can I get one?" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 72 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 230, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__SHOPPING__/sudugo-smart-watch-2021-ver-watches-for-women-1-69-touch-screen-fitness-tracker-with-heart-rate-and-sleep-monitor-ip67-waterproof-smartwatch-calorie-pedometer-activity-tracker-for-android-ios-pink.html |AND| __CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find me the {{attribute}} {{item}} from the classifieds site that {{condition}}.{{action}}", + "intent": "Find me the cheapest watch in the Jewelry category from the classifieds site that roughly matches the time shown on the first image of this watch on OneStopMarket, up to an error of plus minus 10 minutes.", + "image": null, + "instantiation_dict": { + "attribute": "cheapest", + "item": "watch in the Jewelry category", + "condition": "roughly matches the time shown on the first image of this watch on OneStopMarket, up to an error of plus minus 10 minutes", + "action": "" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=56727", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 68 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 231, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__REDDIT__/f/Maine/99914/new-tourist-trap |AND| __CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Show me the {{attribute}} {{item}} {{condition}} from the classifieds site that looks the most similar to the flower in the image on Reddit. It doesn't have to be the same color.", + "intent": "Show me the cheapest flower in the Farm + garden category under $10 from the classifieds site that looks the most similar to the flower in the image on Reddit. It doesn't have to be the same color.", + "image": null, + "instantiation_dict": { + "attribute": "cheapest", + "item": "flower in the Farm + garden category", + "condition": "under $10" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=69345 |OR| __CLASSIFIEDS__/index.php?page=item&id=10003", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 73 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 232, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__REDDIT__/f/consoles/124577/name-a-better-console-you-can-t |AND| __CLASSIFIEDS__", + "geolocation": null, + "intent_template": "Find me the {{attribute}} {{item}} from the classifieds site that {{condition}}.{{action}}", + "intent": "Find me the cheapest controller from the classifieds site that is meant for the console in the image on the Reddit tab.", + "image": null, + "instantiation_dict": { + "attribute": "cheapest", + "item": "controller", + "condition": "is meant for the console in the image on the Reddit tab", + "action": "" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__CLASSIFIEDS__/index.php?page=item&id=33011", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 68 + }, + { + "sites": [ + "classifieds" + ], + "task_id": 233, + "require_login": true, + "storage_state": "./.auth/classifieds_state.json", + "start_url": "__CLASSIFIEDS__/index.php?page=item&id=28914 |AND| __REDDIT__/f/springfieldMO/48540/never", + "geolocation": null, + "intent_template": "Are the characters in the image on Reddit shown in the listing on the classifieds site? If they are, name me the movie that they appear in.", + "intent": "Are the characters in the image on Reddit shown in the listing on the classifieds site? If they are, name me the movie that they appear in.", + "image": null, + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "lion king" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 74 + } +] \ No newline at end of file diff --git a/VAB-WebArena-Lite/config_files/vwa/test_reddit.raw.json b/VAB-WebArena-Lite/config_files/vwa/test_reddit.raw.json new file mode 100644 index 0000000..2a0fb8a --- /dev/null +++ b/VAB-WebArena-Lite/config_files/vwa/test_reddit.raw.json @@ -0,0 +1,8301 @@ +[ + { + "sites": [ + "reddit" + ], + "task_id": 0, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/food", + "geolocation": null, + "intent_template": "I'm trying to find this post. Navigate to the comment section for it.", + "intent": "I'm trying to find this post. Navigate to the comment section for it.", + "image": "__REDDIT__/submission_images/a731d0dd8da7f9db30c5b1aa2caddc79237f23b63d01181fa394458536d30cd4.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/food/82896", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "post is near the top, can be done without exact image understanding", + "intent_template_id": 0 + }, + { + "sites": [ + "reddit" + ], + "task_id": 1, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/food", + "geolocation": null, + "intent_template": "I'm trying to find this post. Navigate to the comment section for it.", + "intent": "I'm trying to find this post. Navigate to the comment section for it.", + "image": "__REDDIT__/submission_images/e012f0528f58c57240a4530b9c04f07f9828053c5202d97306d528048fc002c0.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/food/60721", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "need to navigate to the second page to find this", + "intent_template_id": 0 + }, + { + "sites": [ + "reddit" + ], + "task_id": 2, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/movies", + "geolocation": null, + "intent_template": "I'm trying to find this post. Navigate to the comment section for it.", + "intent": "I'm trying to find this post. Navigate to the comment section for it.", + "image": "__REDDIT__/submission_images/91ba8efd310cfbb3dd72dcd58183713fe00378019b90ee05555fe62cbae8d46c.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/movies/107714", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "medium: need to navigate to the bottom to find this. /f/movies is not all images, but a mix of posts and images", + "intent_template_id": 0 + }, + { + "sites": [ + "reddit" + ], + "task_id": 3, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/food", + "geolocation": null, + "intent_template": "I'm trying to do some research about the post with this image. Tell me how many comments mention the word \"{{word}}\".", + "intent": "I'm trying to do some research about the post with this image. Tell me how many comments mention the word \"spicy\".", + "image": "__REDDIT__/submission_images/24793bc78359a112286f5a18e72b3eb4bf74f1ecc1518ebdce6b201c728b1b44.jpg", + "instantiation_dict": { + "word": "spicy" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "11" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "11" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "product image provided, and need to find the post and count all comments", + "intent_template_id": 1 + }, + { + "sites": [ + "reddit" + ], + "task_id": 4, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "I'm trying to do some research about the post with this image. Tell me how many comments mention the word \"{{word}}\".", + "intent": "I'm trying to do some research about the post with this image. Tell me how many comments mention the word \"wheat\".", + "image": "__REDDIT__/submission_images/303ce4a44cd98a77a1d3ed99511e21fcc9973fc8aa5e0382f42f3ed3c598ac2a.jpg", + "instantiation_dict": { + "word": "wheat" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "__REDDIT__/f/OldSchoolCool/121626/in-1982-agnes-denes-cultivated-grew-and-harvested-a-two-acre", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "product image provided, need to navigate to the correct post in /f/OldSchoolCool and count all comments", + "intent_template_id": 1 + }, + { + "sites": [ + "reddit" + ], + "task_id": 5, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "I'm trying to do some research about the post with this image. Tell me how many comments this post has.", + "intent": "I'm trying to do some research about the post with this image. Tell me how many comments this post has.", + "image": "__REDDIT__/submission_images/1dca2e0d195036fd9d5bb3416cef4cc061796b48dc8b7a8b4d4d14229340a45e.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "121" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "121" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "hard (even harder for humans): product image provided, and need to identify that the post is in /f/MechanicalKeyboards, and find the correct one", + "intent_template_id": 2 + }, + { + "sites": [ + "reddit" + ], + "task_id": 6, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "I'm trying to do some research about the post with this image. Tell me how many comments this post has.", + "intent": "I'm trying to do some research about the post with this image. Tell me how many comments this post has.", + "image": "__REDDIT__/submission_images/ac7dbdd1f4053d2603a4e72e7eac53a35d1cf80c665889fbf4c2affe4415c996.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "28" + ] + }, + "reference_url": "__REDDIT__/f/food/82940/homemade-sweet-amp-spicy-sticky-chinese-pork-belly", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "28" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "hard (even harder for humans): product image provided, and need to identify that the post is in /f/food, and find the correct one. It's several pages in.", + "intent_template_id": 2 + }, + { + "sites": [ + "reddit" + ], + "task_id": 7, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "I'm doing some research about the post with this image. Navigate to the comment url of the recipe posted by the OP in the comments.", + "intent": "I'm doing some research about the post with this image. Navigate to the comment url of the recipe posted by the OP in the comments.", + "image": "__REDDIT__/submission_images/e1eaca118ba95fee4a25afbba5bda97e46f53b6abfffba484a79961e2058f9bd.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The OP did not provide a recipe", + "reference_answer_raw_annotation": "The OP did not provide a recipe" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "product image provided, and need to find the post and check if a recipe is provided (it is not)", + "intent_template_id": 3 + }, + { + "sites": [ + "reddit" + ], + "task_id": 8, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "I'm doing some research about the post with this image. Navigate to the comment url of the recipe posted by the OP in the comments.", + "intent": "I'm doing some research about the post with this image. Navigate to the comment url of the recipe posted by the OP in the comments.", + "image": "__REDDIT__/submission_images/24793bc78359a112286f5a18e72b3eb4bf74f1ecc1518ebdce6b201c728b1b44.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/food/104449/-/comment/1958174", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "product image provided, and need to find the post and check if a recipe is provided (it is)", + "intent_template_id": 3 + }, + { + "sites": [ + "reddit" + ], + "task_id": 9, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/food", + "geolocation": null, + "intent_template": "I'm doing some research about the post with this image. Navigate to the comment url of the recipe posted by the OP in the comments.", + "intent": "I'm doing some research about the post with this image. Navigate to the comment url of the recipe posted by the OP in the comments.", + "image": "__REDDIT__/submission_images/f85e45ccbb6417ca67872694cb4e9906fb5d52415517b5a82ccd4be43df2bce5.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/food/125449/-/comment/2293962 |OR| __REDDIT__/f/food/125449/-/comment/2305083", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "product image provided, and need to find the post (starting from /f/food) and check if a recipe is provided", + "intent_template_id": 3 + }, + { + "sites": [ + "reddit" + ], + "task_id": 10, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/food", + "geolocation": null, + "intent_template": "I'm doing some research about the post with this image. Navigate to the comment url of the recipe posted by the OP in the comments.", + "intent": "I'm doing some research about the post with this image. Navigate to the comment url of the recipe posted by the OP in the comments.", + "image": "__REDDIT__/submission_images/95279e58f603d7bc121dd49b061440053ebe8c29496c36487cb9005ba7b37515.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/food/39576/-/comment/720148", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "product image provided, and need to find the post (need to paginate starting from /f/food) and check if a recipe is provided (it is not)", + "intent_template_id": 3 + }, + { + "sites": [ + "reddit" + ], + "task_id": 11, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Can you help me find and navigate to the comments section of the post with this image?", + "intent": "Can you help me find and navigate to the comments section of the post with this image?", + "image": "__REDDIT__/submission_images/534655c4f627b6bfc41f279290a8f0dec6616d24ed7baa2a7b7054326fdd4269.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/pittsburgh/45900", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "hard: product image provided, need to identify that it's of Pittsburgh, navigate to that subreddit, and click on the post", + "intent_template_id": 4 + }, + { + "sites": [ + "reddit" + ], + "task_id": 12, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Can you help me find and navigate to the comments section of the post with this image?", + "intent": "Can you help me find and navigate to the comments section of the post with this image?", + "image": "__REDDIT__/submission_images/756ea348e1bbd91001c7dc43f1c0f5f1bcd744691f12eaf535e08b680a5a8f37.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/pittsburgh/46057", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "hard: product image provided, need OCR to identify that it's of Pittsburgh, navigate to that subreddit, and click on the post", + "intent_template_id": 4 + }, + { + "sites": [ + "reddit" + ], + "task_id": 13, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Can you help me find and navigate to the comments section of the post with this image?", + "intent": "Can you help me find and navigate to the comments section of the post with this image?", + "image": "__REDDIT__/submission_images/dc079dc0eafe6bf8dbf1493fb9d59f6868c65ac5c3fafb71f4e6e8748d85b71b.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/Newark/35719", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "hard: product image provided, need to identify that it's of Newark, navigate to that subreddit, and click on the post", + "intent_template_id": 4 + }, + { + "sites": [ + "reddit" + ], + "task_id": 14, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Can you help me find and navigate to the comments section of an image post that has something like this image?", + "intent": "Can you help me find and navigate to the comments section of an image post that has something like this image?", + "image": "__REDDIT__/submission_images/4b2b1b0890ee42f1cc7447e925bcc50d37de165254e268c1e792ef8cb30ac338.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__row", + "eval_vqa": [ + { + "question": "Is this a photo taken at night? (yes/no)", + "answer": "yes" + }, + { + "question": "Is this photo of a city skyline? (yes/no)", + "answer": "yes" + } + ] + } + ], + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "medium: product image provided, need to find something similar out of all subreddits", + "intent_template_id": 5 + }, + { + "sites": [ + "reddit" + ], + "task_id": 15, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Can you help me find and navigate to the comments section of an image post that has something like this image?", + "intent": "Can you help me find and navigate to the comments section of an image post that has something like this image?", + "image": "coco_images/000000000285.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__row", + "eval_vqa": [ + { + "question": "Is this a photo of a bear? (yes/no)", + "answer": "yes" + } + ] + } + ], + "reference_url": "__REDDIT__/f/aww/17116", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "medium: coco image provided, need to find something similar out of all subreddits", + "intent_template_id": 5 + }, + { + "sites": [ + "reddit" + ], + "task_id": 16, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "What {{question}} {{modifier}}?", + "intent": "What month and year (in mm-yyyy format) was this image posted in the /f/pics subreddit?", + "image": "__REDDIT__/submission_images/00d2341392f787ba15960934bea08a81090053c0ac17458c37c743a2eacb9f66.jpg", + "instantiation_dict": { + "question": "month and year (in mm-yyyy format)", + "modifier": "was this image posted in the /f/pics subreddit" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "10-2022" + ] + }, + "reference_url": "__REDDIT__/f/pics/25017/my-oil-painting-of-mcdonald-s-filet-o-fish", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "October 2022" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "hard: product image provided, and need to find the post and identify the date", + "intent_template_id": 6 + }, + { + "sites": [ + "reddit" + ], + "task_id": 17, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Navigate to a subreddit where I can find the most similar content to this image.", + "intent": "Navigate to a subreddit where I can find the most similar content to this image.", + "image": "__REDDIT__/submission_images/43a7a1ca5d08b16fcb11926a7bcc4b16476ed8e62a55bdd917b1020f2510f2e2.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/memes", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "medium: product image provided, and need to find the meme subreddit", + "intent_template_id": 7 + }, + { + "sites": [ + "reddit" + ], + "task_id": 18, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Navigate to a subreddit where I can find the most similar content to this image.", + "intent": "Navigate to a subreddit where I can find the most similar content to this image.", + "image": "__REDDIT__/submission_images/7a1e98e9fa4016d835815a04aa9375d82955931805163defaa185c8cf8217b47.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/MechanicalKeyboards", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "medium: product image provided, and need to find the mechanical keyboards subreddit", + "intent_template_id": 7 + }, + { + "sites": [ + "reddit" + ], + "task_id": 19, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Find and upvote this post. I think it was posted in {{forum}}.", + "intent": "Find and upvote this post. I think it was posted in /f/GetMotivated.", + "image": "__REDDIT__/submission_images/1c42177287fc4285f7ff6755c520e8484b35aaf63e764f2aaa93764195cdcde0.jpg", + "instantiation_dict": { + "forum": "/f/GetMotivated" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/GetMotivated/76904/image-you-are-tomorrow", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "hard: product image provided, need to find the correct post and upvote it", + "intent_template_id": 8 + }, + { + "sites": [ + "reddit" + ], + "task_id": 20, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Find and upvote this post. I think it was posted in {{forum}}.", + "intent": "Find and upvote this post. I think it was posted in /f/space.", + "image": "__REDDIT__/media/cache/resolve/submission_thumbnail_1x/a1920694edca7656fc929bab1166d2d667181e3aa495147cbe319dbadcd6cccb.jpg", + "instantiation_dict": { + "forum": "/f/space" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/space/91926/milkdromeda", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "hard: product image provided, need to find the correct post (second page in /f/space) and upvote it", + "intent_template_id": 8 + }, + { + "sites": [ + "reddit" + ], + "task_id": 21, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Find and upvote this post. I think it was posted in {{forum}}.", + "intent": "Find and upvote this post. I think it was posted in /f/OldSchoolCool.", + "image": "__REDDIT__/submission_images/97414d47ab9fe6c197aaa93f5157a80d59db4df0cbc9b683c7cb5aa9ad326e0e.jpg", + "instantiation_dict": { + "forum": "/f/OldSchoolCool" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/OldSchoolCool/56995/goerge-clooney-1990", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "hard: product image provided, need to find the correct post (third page in /f/OldSchoolCool) and upvote it", + "intent_template_id": 8 + }, + { + "sites": [ + "reddit" + ], + "task_id": 22, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Find this post and tell me what the top comment says.", + "intent": "Find this post and tell me what the top comment says.", + "image": "__REDDIT__/submission_images/38fa7f08e147044f478f01b6a5a01d0798abf168016a3b38d2cc84779cb210e1.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Very rare. I swear last time i saw these on ebay they were like \u00a3700" + ] + }, + "reference_url": "__REDDIT__/f/consoles/103611", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Very rare. I swear last time i saw these on ebay they were like \u00a3700" + }, + "intent_template_id": 9, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "hard: dark product image provided, need to find the correct post and identify the top comment. __REDDIT__/f/consoles/103611/-/comment/1742498" + }, + { + "sites": [ + "reddit" + ], + "task_id": 23, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Find this post and tell me what the top comment says.", + "intent": "Find this post and tell me what the top comment says.", + "image": "__REDDIT__/submission_images/2319bc580d861d7543159a48d1bc6085b1e62bfddb0673b35c09f924fec3e9ba.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "https://imgur.com/a/P1YyboM" + ] + }, + "reference_url": "__REDDIT__/f/photoshopbattles/24816/-/comment/155650", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "https://imgur.com/a/P1YyboM" + }, + "intent_template_id": 9, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "hard: image provided, need to find the correct post and identify the top comment." + }, + { + "sites": [ + "reddit" + ], + "task_id": 24, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Find this post and tell me what the top comment says.", + "intent": "Find this post and tell me what the top comment says.", + "image": "__REDDIT__/submission_images/c00998a8eba2c76e09451d6a01f93052f2aebd87808db67e451151aa22467578.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Nah fam, that's a", + "psychonaut" + ] + }, + "reference_url": "__REDDIT__/f/pics/45632/-/comment/792645", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Nah fam, that's a psychonaut" + }, + "intent_template_id": 9, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "hard: image provided, need to find the correct post and identify the top comment." + }, + { + "sites": [ + "reddit" + ], + "task_id": 25, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Find this post and tell me what the top comment says.", + "intent": "Find this post and tell me what the top comment says.", + "image": "__REDDIT__/submission_images/bf9827860b0a877f292bbc9082ecadada26d3a8757982478c30c1a121de15bcd.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Pompadour Cotinga I believe. On the top 10 list of birds I will never see in my garden." + ] + }, + "reference_url": "__REDDIT__/f/pics/45697/-/comment/794907", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Pompadour Cotinga I believe. On the top 10 list of birds I will never see in my garden." + }, + "intent_template_id": 9, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "hard: image provided, need to find the correct post and identify the top comment. " + }, + { + "sites": [ + "reddit" + ], + "task_id": 26, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Find this post and tell me what the top comment says.", + "intent": "Find this post and tell me what the top comment says.", + "image": "https://images.pexels.com/photos/9782305/pexels-photo-9782305.jpeg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "This post does not exist on the site.", + "reference_answer_raw_annotation": "This post does not exist on the site." + }, + "intent_template_id": 9, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "hard: image provided, need to identify that there is no such post" + }, + { + "sites": [ + "reddit" + ], + "task_id": 27, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Make a Reddit post with this image ({{image_url}}) in the most relevant subreddit, and navigate to its comments section.", + "intent": "Make a Reddit post with this image (https://images.pexels.com/photos/14267038/pexels-photo-14267038.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2) in the most relevant subreddit, and navigate to its comments section.", + "image": "https://images.pexels.com/photos/14267038/pexels-photo-14267038.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "image_url": "https://images.pexels.com/photos/14267038/pexels-photo-14267038.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/MechanicalKeyboards", + "url_note": "GOLD in PRED", + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__content", + "eval_fuzzy_image_match": "https://images.pexels.com/photos/14267038/pexels-photo-14267038.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2" + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "hard: existing models probably cannot handle this task because of screen browsing/navigation capabilities", + "intent_template_id": 10 + }, + { + "sites": [ + "reddit" + ], + "task_id": 28, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Write a story about this picture and post it in the most relevant subreddit.", + "intent": "Write a story about this picture and post it in the most relevant subreddit.", + "image": "coco_images/000000515982.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/pics", + "program_html": [ + { + "url": "last", + "locator": "func:get_query_text(__page__, '.submission__inner')", + "required_contents": { + "must_include": [ + "baseball" + ] + } + }, + { + "url": "last", + "locator": "func:get_query_text(__page__, '.submission__submitter')", + "required_contents": { + "must_include": [ + "MarvelsGrantMan136" + ] + } + }, + { + "url": "last", + "locator": "func:get_query_text(__page__, '.submission__info')", + "required_contents": { + "must_include": [ + "pics |OR| sports" + ] + } + } + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_fuzzy_image_match": "coco_images/000000515982.jpg" + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 11 + }, + { + "sites": [ + "reddit" + ], + "task_id": 29, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Write a story about this picture and post it in the most relevant subreddit.", + "intent": "Write a story about this picture and post it in the most relevant subreddit.", + "image": "https://images.pexels.com/photos/1170986/pexels-photo-1170986.jpeg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/pics", + "program_html": [ + { + "url": "last", + "locator": "func:get_query_text(__page__, '.submission__inner')", + "required_contents": { + "must_include": [ + "cat" + ] + } + }, + { + "url": "last", + "locator": "func:get_query_text(__page__, '.submission__submitter')", + "required_contents": { + "must_include": [ + "MarvelsGrantMan136" + ] + } + }, + { + "url": "last", + "locator": "func:get_query_text(__page__, '.submission__info')", + "required_contents": { + "must_include": [ + "pics |OR| aww" + ] + } + } + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_fuzzy_image_match": "https://images.pexels.com/photos/1170986/pexels-photo-1170986.jpeg" + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 11 + }, + { + "sites": [ + "reddit" + ], + "task_id": 30, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Find the user who posted this and navigate to the comments section of one of their other posts.", + "intent": "Find the user who posted this and navigate to the comments section of one of their other posts.", + "image": "__REDDIT__/submission_images/4f93a8def3912dd323c901b33d9ee862138f15152632d91f6f2fff0d2ff0a9e6.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/gaming/105429", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "hard: need to navigate to the appropriate subreddit (based off gaming content), identify the correct user, and navigate to their other posts. Original URL: __REDDIT__/f/gaming/126114/oc-loot", + "intent_template_id": 12 + }, + { + "sites": [ + "reddit" + ], + "task_id": 31, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Find the user who posted this and navigate to the comments section of one of their other posts.", + "intent": "Find the user who posted this and navigate to the comments section of one of their other posts.", + "image": "__REDDIT__/submission_images/d3333ad4839961f9f765b514279fc0730c9c7fee194ecd3fd069e3f15ef86234.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "This user does not have other posts.", + "reference_answer_raw_annotation": "This user does not have other posts." + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "hard: need to navigate to the appropriate subreddit, identify the correct user, and find out that they don't have other posts (__REDDIT__/f/pics/89308/the-tropical-rainbow-lobster-panulirus-ornatus)", + "intent_template_id": 12 + }, + { + "sites": [ + "reddit" + ], + "task_id": 32, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Find the user who posted this and navigate to the comments section of one of their other posts.", + "intent": "Find the user who posted this and navigate to the comments section of one of their other posts.", + "image": "__REDDIT__/submission_images/bd60b4847804344faecd02aaaa6cb280024a63cd3610283ab2de3406f3f75cf8.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/nyc/44500 |OR| __REDDIT__/f/nyc/44527 |OR| __REDDIT__/f/pics/89551 |OR| __REDDIT__/f/pics/110950 |OR| __REDDIT__/f/nyc/130377", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "hard: need to navigate to the appropriate subreddit (based off DC Wharf), identify the correct user, and navigate to their other posts", + "intent_template_id": 12 + }, + { + "sites": [ + "reddit" + ], + "task_id": 33, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Find the user who posted this and navigate to the comments section of one of their other posts.", + "intent": "Find the user who posted this and navigate to the comments section of one of their other posts.", + "image": "__REDDIT__/submission_images/7a0c19ac8feaf6539c069a1ddbdda91e491fdacd6980a34d205d94b4fcc08ebb.gif", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/gaming/126203/last-of-us-on-pc-part-ii", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "hard: need to navigate to the appropriate subreddit (based off gaming content), identify the correct user, and navigate to their other posts (__REDDIT__/f/gaming/126063/last-of-us-on-pc)", + "intent_template_id": 12 + }, + { + "sites": [ + "reddit" + ], + "task_id": 34, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Navigate to the comments section of the {{ranking}} image post in the {{subreddit}} subreddit that {{attribute}}.", + "intent": "Navigate to the comments section of the all time top ranked image post in the /f/boston subreddit that is a photo of the city.", + "image": null, + "instantiation_dict": { + "ranking": "all time top ranked", + "subreddit": "/f/boston", + "attribute": "is a photo of the city" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/boston/124422", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "medium: need to navigate to the correct subreddit and identify the first post that contains a picture of the city", + "intent_template_id": 13 + }, + { + "sites": [ + "reddit" + ], + "task_id": 35, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Navigate to the comments section of the {{ranking}} image post in the {{subreddit}} subreddit that {{attribute}}.", + "intent": "Navigate to the comments section of the latest image post in the /f/washingtondc subreddit that contains trees.", + "image": null, + "instantiation_dict": { + "ranking": "latest", + "subreddit": "/f/washingtondc", + "attribute": "contains trees" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/washingtondc/136819", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "need to navigate to the correct subreddit and identify the first post that contains a picture of trees (second page)", + "intent_template_id": 13 + }, + { + "sites": [ + "reddit" + ], + "task_id": 36, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Navigate to the comments section of the {{ranking}} image post in the {{subreddit}} subreddit that {{attribute}}.", + "intent": "Navigate to the comments section of the all time top ranked image post in the /f/nyc subreddit that contains an animal.", + "image": null, + "instantiation_dict": { + "ranking": "all time top ranked", + "subreddit": "/f/nyc", + "attribute": "contains an animal" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/nyc/44619", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "need to navigate to the correct subreddit and identify the top post that contains a picture of animals (there is a lanternfly costume which is a distractor)", + "intent_template_id": 13 + }, + { + "sites": [ + "reddit" + ], + "task_id": 37, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Navigate to the comments section of the {{ranking}} image post in the {{subreddit}} subreddit that {{attribute}}.", + "intent": "Navigate to the comments section of the latest image post in the /f/Art subreddit that contains animals.", + "image": null, + "instantiation_dict": { + "ranking": "latest", + "subreddit": "/f/Art", + "attribute": "contains animals" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/Art/116643", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "need to navigate to the correct subreddit and identify the first post that contains a picture of animals (not described in text)", + "intent_template_id": 13 + }, + { + "sites": [ + "reddit" + ], + "task_id": 38, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "I'd like to find the subreddit for the city this photo was taken in. Can you navigate to it?", + "intent": "I'd like to find the subreddit for the city this photo was taken in. Can you navigate to it?", + "image": "https://images.pexels.com/photos/11355109/pexels-photo-11355109.jpeg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/jerseycity", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "hard: need to identify that the picture is of Jersey City, and navigate there", + "intent_template_id": 14 + }, + { + "sites": [ + "reddit" + ], + "task_id": 39, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "I'd like to find the subreddit for the city this photo was taken in. Can you navigate to it?", + "intent": "I'd like to find the subreddit for the city this photo was taken in. Can you navigate to it?", + "image": "https://images.pexels.com/photos/208745/pexels-photo-208745.jpeg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "San Francisco does not have a subreddit.", + "reference_answer_raw_annotation": "San Francisco does not have a subreddit." + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "Need to identify that the picture is of San Francisco (easy), and figure out that it doesn't have a subreddit", + "intent_template_id": 14 + }, + { + "sites": [ + "reddit" + ], + "task_id": 40, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "I'd like to find the subreddit for the city this photo was taken in. Can you navigate to it?", + "intent": "I'd like to find the subreddit for the city this photo was taken in. Can you navigate to it?", + "image": "https://images.pexels.com/photos/18221027/pexels-photo-18221027/free-photo-of-aerial-photo-of-pittsburgh-downtown-skyline.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/pittsburgh", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "Need to identify that the picture is of Pittsburgh (easy/medium), and navigate there", + "intent_template_id": 14 + }, + { + "sites": [ + "reddit" + ], + "task_id": 41, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Navigate to the comments section of the first image post in {{subreddit}} that {{attribute}}.", + "intent": "Navigate to the comments section of the first image post in /f/Newark as sorted by hot that does not have people in it.", + "image": null, + "instantiation_dict": { + "subreddit": "/f/Newark as sorted by hot", + "attribute": "does not have people in it" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/Newark/100584", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 15 + }, + { + "sites": [ + "reddit" + ], + "task_id": 42, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Navigate to the comments section of the first image post in {{subreddit}} that {{attribute}}.", + "intent": "Navigate to the comments section of the first image post in /f/MechanicalKeyboards as sorted by hot that does not have a keyboard in its photo.", + "image": null, + "instantiation_dict": { + "subreddit": "/f/MechanicalKeyboards as sorted by hot", + "attribute": "does not have a keyboard in its photo" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/MechanicalKeyboards/78235", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 15 + }, + { + "sites": [ + "shopping", + "reddit" + ], + "task_id": 43, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__SHOPPING__ |AND| __REDDIT__/forums", + "geolocation": null, + "intent_template": "Gather the title of {{product}} from OneStopShop, and post its full product name in the {{subreddit}} subreddit under the title \"{{post_title}}\"", + "instantiation_dict": { + "product": "the cheapest yellow video projector in the \"Video Projectors\" category", + "subreddit": "/f/gadgets", + "post_title": "Has anyone bought this product?" + }, + "intent": "Gather the title of the cheapest yellow video projector in the \"Video Projectors\" category from OneStopShop, and post its full product name in the /f/gadgets subreddit under the title \"Has anyone bought this product?\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/gadgets", + "url_note": "GOLD in PRED", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "func:get_query_text(__page__, '.submission__title')", + "required_contents": { + "exact_match": "Has anyone bought this product?" + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "func:get_query_text(__page__, '.submission__body')", + "required_contents": { + "must_include": [ + "RUIQIMAO Mini Projector Portable 1080P Home Cinema Projector Theater Video 100''Display LCD HiFi Audio for TV Box/Xbox/TF Card/U Disk P2L1" + ] + } + } + ] + }, + "intent_template_id": 16, + "comments": "__SHOPPING__/ruiqimao-mini-projector-portable-1080p-home-cinema-projector-theater-video-100-display-lcd-hifi-audio-for-tv-box-xbox-tf-card-u-disk-p2l1.html", + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping", + "reddit" + ], + "task_id": 44, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__SHOPPING__ |AND| __REDDIT__/forums", + "geolocation": null, + "intent_template": "Find an image of {{product}} from OneStopShop, and post it as an image in the {{subreddit}} subreddit with the title as the name of the piece and the artist's name.", + "instantiation_dict": { + "product": "the most expensive artwork from the Posters & Prints category", + "subreddit": "/f/Art" + }, + "intent": "Find an image of the most expensive artwork from the Posters & Prints category from OneStopShop, and post it as an image in the /f/Art subreddit with the title as the name of the piece and the artist's name.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/Art", + "url_note": "GOLD in PRED", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "func:get_query_text(__page__, '.submission__title')", + "required_contents": { + "must_include": [ + "Frozen Shores", + "Marion Griese" + ] + } + } + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_fuzzy_image_match": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09NRZ3LZW.0.jpg" + } + ] + }, + "intent_template_id": 17, + "comments": "__SHOPPING__/fine-art-canvas-frozen-shores-canvas-print-artwork-by-marion-griese-45-w-x-60-h-vertical-gallery-wrapped-large-wall-decor.html", + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 45, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/60156 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "What is the total area in square miles, of the states that are ranked as having 150+ endangered species? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "is the total area in square miles, of the states", + "modifier": "are ranked as having 150+ endangered species" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "561400 |OR| 561,400" + ] + } + }, + "intent_template_id": 18, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "Texas (268,596) + Alabama (52,419) + Florida (65,758) + California (163,696) + Hawaii (10,931) = 561400 square miles" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 46, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/103854 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "What is the 2022 total nominal GDP of the area that produces most sugarcane in the year of 2021? (in billion)? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "is the 2022 total nominal GDP of the area", + "modifier": "produces most sugarcane in the year of 2021? (in billion)" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "39000 |OR| 39,000" + ] + } + }, + "intent_template_id": 18, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Asia: 39 trillion" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 47, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/103905 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "What is the area (in km^2) of the country that emitted second most plastic waste into the ocean? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "is the area (in km^2) of the country", + "modifier": "emitted second most plastic waste into the ocean" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "3287263 |OR| 3,287,263" + ] + } + }, + "intent_template_id": 18, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/India: 3,287,263 km^2" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 48, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "What {{question}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "What is the absolute difference between NVIDIA's FY22 net income listed in wikipedia and reddit forum 'dataisbeautiful' (in billions)? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "is the absolute difference between NVIDIA's FY22 net income listed in wikipedia and reddit forum 'dataisbeautiful' (in billions)" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "5.35" + ] + } + }, + "intent_template_id": 19, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Nvidia: 9.75B, __REDDIT__/f/dataisbeautiful/124787/oc-nvidia-s-fy22-income-statement: 4.4B" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 49, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/82181 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "What is the first year the Summer Olympics include the women's version of the sport that had the largest average male athlete weight in the 2012 Olympics? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "is the first year the Summer Olympics include the women's version of the sport", + "modifier": "had the largest average male athlete weight in the 2012 Olympics" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1976" + ] + } + }, + "intent_template_id": 18, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Rowing_(sport): ...The introduction of women's rowing at the 1976 Summer Olympics in Montreal increased..." + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 50, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/18254 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "What is the name of the major commercial airport in the state that is colored green? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "is the name of the major commercial airport in the state", + "modifier": "is colored green" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Seattle-Tacoma |OR| Sea-Tac" + ] + } + }, + "intent_template_id": 18, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Washington_(state): ... Seattle-Tacoma International Airport (Sea-Tac) is the major commercial airport of greater Seattle..." + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 51, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/18267 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "What is the ISIN of the company that occupies the largest portion in Warren Buffet's portfolio? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "is the ISIN of the company", + "modifier": "occupies the largest portion in Warren Buffet's portfolio" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "US0378331005" + } + }, + "intent_template_id": 18, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Apple_Inc." + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 52, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/124826 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "What is the largest ethnic group in the country that has the least average number of physicians per person? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "is the largest ethnic group in the country", + "modifier": "has the least average number of physicians per person" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Hausa" + } + }, + "intent_template_id": 18, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Niger" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 53, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/60171 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "What {{question}} {{modifier}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "What was the total asset value (in trillions of USD, to 3 decimal places) of the fifth largest employer in China in 2017? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "was the total asset value (in trillions of USD, to 3 decimal places)", + "modifier": "of the fifth largest employer in China in 2017" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0.585" + ] + } + }, + "intent_template_id": 20, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/State_Grid_Corporation_of_China, US$ 585.278 billion (2017)" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 54, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/18215 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "When {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "When did programming language that has the largest variance in salary first appear? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "did programming language", + "modifier": "has the largest variance in salary first appear" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "August 1993" + } + }, + "intent_template_id": 21, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/R_(programming_language)" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 55, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/38920 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "Who {{question}}, {{modifier}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "Who is the first spouse of the second most philanthropic billionaire, measured in percentage of wealth donated in 2022? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "is the first spouse of the second most philanthropic billionaire", + "modifier": "measured in percentage of wealth donated in 2022" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Annaliese Witschak" + } + }, + "intent_template_id": 22, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/George_Soros" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 56, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/124820 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "What are the three major geographical divisions of the state that has the third highest number of bank failure? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "are the three major geographical divisions of the state", + "modifier": "has the third highest number of bank failure" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Northern", + "Central", + "Southern", + "Illinois" + ] + } + }, + "intent_template_id": 18, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Illinois, Section Divisions" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 57, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/124818 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "What is the sum of the area, in sq km, of the top two countries that has the most uniformly dense transportation network? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "is the sum of the area, in sq km, of the top two countries", + "modifier": "has the most uniformly dense transportation network" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "387711 |OR| 387,711" + ] + } + }, + "intent_template_id": 18, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Belgium: 30689, __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Germany: 357022" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 58, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/38990 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "Who {{question}} {{modifier}}?", + "intent": "Who is the author of the most popular novel adapted anime in year 2012?", + "image": [], + "instantiation_dict": { + "question": "is the author", + "modifier": "of the most popular novel adapted anime in year 2012" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Reki Kawahara" + } + }, + "intent_template_id": 23, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Sword_Art_Online" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 59, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/39014 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "What is the total employees (in 2021) of the accounting firms that provide both tax and legal services? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "is the total employees (in 2021) of the accounting firms", + "modifier": "provide both tax and legal services" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "876374 |OR| 876,374" + ] + } + }, + "intent_template_id": 18, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Deloitte: 345374, __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/PricewaterhouseCoopers: 295000, __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/KPMG: 236000" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 60, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/82296 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "What is the ISBN of the genre fiction book that takes OP the longest time to read in 2022? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "is the ISBN of the genre fiction book", + "modifier": "takes OP the longest time to read in 2022" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "0385302304" + } + }, + "intent_template_id": 18, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Outlander_(novel)" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 61, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/103828 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "Who {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "Who directed the movie that is shown as the background picture of the third highest earning movie production companies? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "directed the movie", + "modifier": "is shown as the background picture of the third highest earning movie production companies" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Sam Mendes" + } + }, + "intent_template_id": 24, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Skyfall" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 62, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/82234 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "Who {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "Who potrayed the character in friends that has the most intimate connections? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "potrayed the character in friends", + "modifier": "has the most intimate connections" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Lisa Kudrow" + } + }, + "intent_template_id": 24, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Phoebe_Buffay" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 63, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/82193 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "What is the highest elevation (in ft) of the home state of the team that surpassed Buffalo Bills in both defensive rating and offensive rating during the 2022 Season? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "is the highest elevation (in ft) of the home state of the team", + "modifier": "surpassed Buffalo Bills in both defensive rating and offensive rating during the 2022 Season" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "3213 |OR| 3,213" + ] + } + }, + "intent_template_id": 18, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Philadelphia_Eagles" + }, + { + "sites": [ + "reddit" + ], + "task_id": 64, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/60133", + "geolocation": null, + "intent_template": "What {{question}} that {{modifier}}?", + "intent": "What are the colors of the flag used in the country that is the third largest producer of cherries in year 2012?", + "image": [], + "instantiation_dict": { + "question": "are the colors of the flag used in the country", + "modifier": "is the third largest producer of cherries in year 2012" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "red", + "green", + "white" + ] + } + }, + "intent_template_id": 25, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Italy" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 65, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/82170 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "What is the total city population in 2021 of the two cities that the densest populated in the population density map of Portugal? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "is the total city population in 2021 of the two cities", + "modifier": "the densest populated in the population density map of Portugal" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "776813 |OR| 776,813" + ] + } + }, + "intent_template_id": 18, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Porto (231962), __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Lisbon (544851)" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 66, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/124984 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "What is the third most popular non-english language (as of 2016) in the state that has the deepest snow during 2022-2023 winter? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "is the third most popular non-english language (as of 2016) in the state", + "modifier": "has the deepest snow during 2022-2023 winter" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Tagalog" + } + }, + "intent_template_id": 18, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/California" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 67, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/60120 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "Who {{question}} that {{modifier}}?", + "intent": "Who is the founder of the brand that has zero stores in Washington state (before 2021) according to the chart in the first tab?", + "image": [], + "instantiation_dict": { + "question": "is the founder of the brand", + "modifier": "has zero stores in Washington state (before 2021) according to the chart in the first tab" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Bill Rosenberg" + } + }, + "intent_template_id": 26, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Dunkin'_Donuts" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 68, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/dataisbeautiful/124889 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "Who {{question}}, {{modifier}}? Answer using the information from the Wikipedia site in the second tab.", + "intent": "Who is the founder of the second largest asset management firm, as measured by AUM in the end of 2022? Answer using the information from the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "question": "is the founder of the second largest asset management firm", + "modifier": "as measured by AUM in the end of 2022" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "John", + "Bogle" + ] + } + }, + "intent_template_id": 22, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/The_Vanguard_Group" + }, + { + "sites": [ + "reddit" + ], + "task_id": 69, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/newhampshire/129011", + "geolocation": null, + "intent_template": "Make a comment in this post {{criteria}}.", + "intent": "Make a comment in this post explaining what the picture is about.", + "image": [], + "instantiation_dict": { + "criteria": "explaining what the picture is about" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/newhampshire/129011", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "fuzzy_match": "The picture is making a joke about New Hampshire being too cold by exaggerating that spring doesn't exist there." + } + } + ] + }, + "intent_template_id": 27, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "reddit" + ], + "task_id": 70, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/pics/new", + "geolocation": null, + "intent_template": "How many comments are {{status}} in the post that {{post_modifier}}", + "intent": "How many comments are removed or deleted in the post that has a turkey picture in the hot posts of f/pics", + "image": [], + "instantiation_dict": { + "status": "removed or deleted", + "post_modifier": "has a turkey picture in the hot posts of f/pics" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "16" + ] + } + }, + "intent_template_id": 28, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "__REDDIT__/f/pics/45604. Counting is hard." + }, + { + "sites": [ + "reddit" + ], + "task_id": 71, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/memes/new", + "geolocation": null, + "intent_template": "Find me {{description}} of {{object}}. Then, {{action}}.", + "intent": "Find me the most recent animated gif of spiderman in f/memes. Then, leave a comment that describes what the spiderman is doing in the meme.", + "image": [], + "instantiation_dict": { + "description": "the most recent animated gif", + "object": "spiderman in f/memes", + "action": "leave a comment that describes what the spiderman is doing in the meme" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/memes/127908", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "fuzzy_match": "The spiderman is flying." + } + } + ] + }, + "intent_template_id": 29, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "__REDDIT__/f/memes/127908, fourth page under chronological order: __REDDIT__/f/memes/new?next%5Bid%5D=127916" + }, + { + "sites": [ + "reddit" + ], + "task_id": 72, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/memes/127531", + "geolocation": null, + "intent_template": "Make a comment in this post {{criteria}}.", + "intent": "Make a comment in this post explaining what the picture is about.", + "image": [], + "instantiation_dict": { + "criteria": "explaining what the picture is about" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/memes/127531", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "fuzzy_match": "The picture is saying that everything is disappointing, regardless if you have a job or are unemployed." + } + } + ] + }, + "intent_template_id": 27, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "reddit" + ], + "task_id": 73, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find a post in {{forum}} that {{description}}. Then, repost the picture to {{forum2}} with {{characteristic}}. Finally, {{action}}.", + "intent": "Find a post in f/MechanicalKeyboards that contains a special keyboard that has a huge W key. Then, repost the picture to f/gaming with title \"WwWwWwWwWwwWw all the way\". Finally, navigate to the post.", + "image": [], + "instantiation_dict": { + "forum": "f/MechanicalKeyboards", + "description": "contains a special keyboard that has a huge W key", + "forum2": "f/gaming", + "characteristic": "title \"WwWwWwWwWwwWw all the way\"", + "action": "navigate to the post" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html", + "page_image_query", + "url_match" + ], + "reference_answers": null, + "reference_url": "f/gaming", + "url_note": "GOLD in PRED", + "program_html": [ + { + "url": "last", + "locator": "lambda:(() => { try { return document.querySelector('.submission__link').textContent } catch { return '' } })()", + "required_contents": { + "exact_match": "WwWwWwWwWwwWw all the way" + } + } + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_fuzzy_image_match": "__REDDIT__/submission_images/c876b20d6b3b643774e61d6a3c0914a9bc013118c9f10bdc9ff4e940a084f4b9.jpg" + } + ] + }, + "intent_template_id": 30, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "__REDDIT__/f/MechanicalKeyboards/14329" + }, + { + "sites": [ + "reddit" + ], + "task_id": 74, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find me {{description}} of {{object}}. Then, {{action}}.", + "intent": "Find me a keyboard version of what the man is wearing in the picture. Then, count how many white key is on it and post it in the comment section of that post with the following format: \"It has white keys!!\" (not including the double inverted commas), replacing with the actual number of white keys in the picture. Remember that partially occluded keycaps need to be counted also.", + "image": [ + "https://images.pexels.com/photos/2955376/pexels-photo-2955376.jpeg" + ], + "instantiation_dict": { + "description": "a keyboard version", + "object": "what the man is wearing in the picture", + "action": "count how many white key is on it and post it in the comment section of that post with the following format: \"It has white keys!!\" (not including the double inverted commas), replacing with the actual number of white keys in the picture. Remember that partially occluded keycaps need to be counted also" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/MechanicalKeyboards/56378", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "must_include": [ + "It has 38 white keys!! |OR| It has 39 white keys!!" + ] + } + } + ] + }, + "intent_template_id": 29, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__REDDIT__/f/MechanicalKeyboards/56378 -- there's a very tiny part of a key between the gap in the upper left, so accepting both 38 and 39." + }, + { + "sites": [ + "reddit", + "shopping" + ], + "task_id": 75, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__SHOPPING__ |AND| __REDDIT__", + "geolocation": null, + "intent_template": "How should I {{task}}? Search {{forum}} for the answer and {{action}}.", + "intent": "How should I keep my keyboard clean while eating cheese balls and typing on keyboard at the same time? Search f/MechanicalKeyboards for the answer and buy me the tool on the shopping site.", + "image": [], + "instantiation_dict": { + "task": "keep my keyboard clean while eating cheese balls and typing on keyboard at the same time", + "forum": "f/MechanicalKeyboards", + "action": "buy me the tool on the shopping site" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:shopping_get_order_product_name_list(__page__)", + "required_contents": { + "must_include": [ + "Chopsticks" + ] + } + } + ] + }, + "intent_template_id": 31, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__REDDIT__/f/MechanicalKeyboards/120949, " + }, + { + "sites": [ + "reddit" + ], + "task_id": 76, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/funny/125907", + "geolocation": null, + "intent_template": "{{question}}? {{action}} and create a comment with the following format: {{comment_format}}.", + "intent": "How much money does the person have? Downvote the post and create a comment with the following format: \"This is a bill from .\" (not including the double inverted commas), replacing with the actual amount of money the person has and with the country that issue the bill.", + "image": [], + "instantiation_dict": { + "question": "How much money does the person have", + "action": "Downvote the post", + "comment_format": "\"This is a bill from .\" (not including the double inverted commas), replacing with the actual amount of money the person has and with the country that issue the bill" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "__REDDIT__/f/funny/125907", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "__REDDIT__/f/funny/125907", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "must_include": [ + "This is a 20000000000 bill from Zimbabwe. |OR| This is a 20,000,000,000 bill from Zimbabwe." + ] + } + } + ] + }, + "intent_template_id": 32, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__REDDIT__/f/funny/125907" + }, + { + "sites": [ + "reddit" + ], + "task_id": 77, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Navigate to {{object}} that {{description}}.", + "intent": "Navigate to the comments section of any post that contains Mona Lisa in the picture.", + "image": [], + "instantiation_dict": { + "object": "the comments section of any post", + "description": "contains Mona Lisa in the picture" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "reference_answers": null, + "reference_url": null, + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_vqa": [ + { + "question": "Is Mona Lisa in the picture? (yes/no)", + "answer": "yes" + } + ] + } + ] + }, + "intent_template_id": 33, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__REDDIT__/f/pics/45680" + }, + { + "sites": [ + "reddit" + ], + "task_id": 78, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Navigate to {{object}} that {{description}}.", + "intent": "Navigate to the comments section of any post that contains a silver banana in the picture.", + "image": [], + "instantiation_dict": { + "object": "the comments section of any post", + "description": "contains a silver banana in the picture" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/pics/25213", + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_vqa": [ + { + "question": "Is banana in the picture? (yes/no)", + "answer": "yes" + }, + { + "question": "Is the banana in the sliver? (yes/no)", + "answer": "yes" + } + ] + } + ] + }, + "intent_template_id": 33, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__REDDIT__/f/pics/25213" + }, + { + "sites": [ + "reddit" + ], + "task_id": 79, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Navigate to {{object}} that {{description}}.", + "intent": "Navigate to the comments section of any post that contains Rick and Morty in the picture.", + "image": [], + "instantiation_dict": { + "object": "the comments section of any post", + "description": "contains Rick and Morty in the picture" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "reference_answers": null, + "reference_url": null, + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_vqa": [ + { + "question": "Is Rick and Morty in the picture? (yes/no)", + "answer": "yes" + } + ] + } + ] + }, + "intent_template_id": 33, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__REDDIT__/f/mildlyinteresting/42152" + }, + { + "sites": [ + "reddit" + ], + "task_id": 80, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/mildlyinteresting", + "geolocation": null, + "intent_template": "Upvote all the posts that {{modifier}} on this page.", + "intent": "Upvote all the posts that have a picture which contains food on this page.", + "image": [], + "instantiation_dict": { + "modifier": "have a picture which contains food" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "__REDDIT__/f/mildlyinteresting/42188", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/mildlyinteresting/63435", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/mildlyinteresting/128010", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/mildlyinteresting/128086", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/mildlyinteresting/128184", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + }, + { + "url": "__REDDIT__/f/mildlyinteresting/42138", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + }, + { + "url": "__REDDIT__/f/mildlyinteresting/85669", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + }, + { + "url": "__REDDIT__/f/mildlyinteresting/42237", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + } + ] + }, + "intent_template_id": 34, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "Note: Does not check every single post on the page. Picture 2 (42188), 4 (63435), 8 (128010), 24 (128086) has food." + }, + { + "sites": [ + "reddit" + ], + "task_id": 81, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/photoshopbattles", + "geolocation": null, + "intent_template": "Upvote all the posts that {{modifier}} on this page.", + "intent": "Upvote all the posts that have a picture which contains cat on this page.", + "image": [], + "instantiation_dict": { + "modifier": "have a picture which contains cat" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "__REDDIT__/f/photoshopbattles/45482", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/photoshopbattles/24822", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/photoshopbattles/131315", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/photoshopbattles/89211", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/photoshopbattles/131323", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + }, + { + "url": "__REDDIT__/f/photoshopbattles/24783", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + }, + { + "url": "__REDDIT__/f/photoshopbattles/67033", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + }, + { + "url": "__REDDIT__/f/photoshopbattles/66941", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + } + ] + }, + "intent_template_id": 34, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "Note: Does not check every single post on the page. Picture 10 (45482), 16 (24822), 18 (131315), 19 (89211) has cat." + }, + { + "sites": [ + "reddit" + ], + "task_id": 82, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/gaming?next%5Branking%5D=2111&next%5Bid%5D=61422", + "geolocation": null, + "intent_template": "Upvote all the posts that {{modifier}} on this page.", + "intent": "Upvote all the posts that have a picture which contains Mario on this page.", + "image": [], + "instantiation_dict": { + "modifier": "have a picture which contains Mario" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "__REDDIT__/f/gaming/40345", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/gaming/19625", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/gaming/126151", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/gaming/126047", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/gaming/126131", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + }, + { + "url": "__REDDIT__/f/gaming/126104", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + }, + { + "url": "__REDDIT__/f/gaming/19706", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + }, + { + "url": "__REDDIT__/f/gaming/83745", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + } + ] + }, + "intent_template_id": 34, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "Note: Does not check every single post on the page. Picture 6 (40345), 8 (19625), 23 (126151), 25 (126047) has Mario." + }, + { + "sites": [ + "reddit" + ], + "task_id": 83, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/Art/active", + "geolocation": null, + "intent_template": "Upvote all the posts that {{modifier}} on this page.", + "intent": "Upvote all the posts that have a picture which contains a human or human-like face on this page.", + "image": [], + "instantiation_dict": { + "modifier": "have a picture which contains a human or human-like face" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "__REDDIT__/f/Art/116512", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/Art/116619", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/Art/116536", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/Art/116517", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/Art/116504", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/Art/116469", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/Art/116444", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/Art/116617", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/Art/116583", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/Art/116508", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/Art/116439", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/Art/116461", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/Art/116595", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + }, + { + "url": "__REDDIT__/f/Art/116610", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + }, + { + "url": "__REDDIT__/f/Art/116476", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + }, + { + "url": "__REDDIT__/f/Art/116538", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + } + ] + }, + "intent_template_id": 34, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "Note: Does not check every single post on the page. Picture 1 (116512), 2 (116619), 3 (116536), 4, 6 (116517), 9 (116504), 11 (116469), 15 (116444), 16 (116617), 17 (116583), 19 (116508), 21 (116439), 24 (116461) has human face." + }, + { + "sites": [ + "reddit" + ], + "task_id": 84, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/consoles", + "geolocation": null, + "intent_template": "Downvote the posts {{modifier1}} and upvote the posts {{modifier2}} but {{action}}.", + "intent": "Downvote the posts in odd row and upvote the posts in even row but skip the one with a fox icon.", + "image": [], + "instantiation_dict": { + "modifier1": "in odd row", + "modifier2": "in even row", + "action": "skip the one with a fox icon" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "__REDDIT__/f/consoles/124522", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/consoles/124584", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/consoles/124590", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/consoles/81946", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/consoles/81967", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/consoles/124571", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + }, + { + "url": "__REDDIT__/f/consoles/124523", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-downvoted" + } + }, + { + "url": "__REDDIT__/f/consoles/81949", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-downvoted" + } + }, + { + "url": "__REDDIT__/f/consoles/124520", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-downvoted" + } + }, + { + "url": "__REDDIT__/f/consoles/124545", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-downvoted" + } + }, + { + "url": "__REDDIT__/f/consoles/103639", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-downvoted" + } + } + ] + }, + "intent_template_id": 35, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "Note: Does not check every single post on the page. Upvote: 2 (124522), 4 (124584), 6, 8, 10 (124590), 12, 16, 18 (81946), 20, 22 (81967), 24; Downvote: 1 (124523), 3, 5 (81949), 7, 9, 11 (124520), 13, 15, 17 (124545), 19, 21 (103639), 23, 25. Skip 14 (124571)." + }, + { + "sites": [ + "reddit", + "wikipedia" + ], + "task_id": 85, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/EarthPorn?next%5Branking%5D=18106&next%5Bid%5D=98160 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "Consider {{modifier}}. Downvote the posts {{modifier1}} and upvote the posts {{modifier2}}.", + "intent": "Consider all posts with bodies of water in this page. Downvote the posts that are taken in northern hemisphere and upvote the posts that are taken in the southern hemisphere.", + "image": [], + "instantiation_dict": { + "modifier": "all posts with bodies of water in this page", + "modifier1": "that are taken in northern hemisphere", + "modifier2": "that are taken in the southern hemisphere" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "__REDDIT__/f/EarthPorn/98160", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-downvoted" + } + }, + { + "url": "__REDDIT__/f/EarthPorn/119215", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-downvoted" + } + }, + { + "url": "__REDDIT__/f/EarthPorn/119100", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-downvoted" + } + }, + { + "url": "__REDDIT__/f/EarthPorn/76382", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-downvoted" + } + }, + { + "url": "__REDDIT__/f/EarthPorn/76394", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-downvoted" + } + }, + { + "url": "__REDDIT__/f/EarthPorn/76351", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + }, + { + "url": "__REDDIT__/f/EarthPorn/76405", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + }, + { + "url": "__REDDIT__/f/EarthPorn/76416", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + }, + { + "url": "__REDDIT__/f/EarthPorn/76387", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + }, + { + "url": "__REDDIT__/f/EarthPorn/54431", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/EarthPorn/119220", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/EarthPorn/119108", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + } + ] + }, + "intent_template_id": 36, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "Note: Does not check every single post on the page. north: 1 (98160), 2, 6 (119215), 7, 8, 10, 11 (119100), 12 (76382), 13, 15, 18 (76394), 19, 20, 23; south: 4 (54431), 17 (119220), 21 (119108)" + }, + { + "sites": [ + "reddit" + ], + "task_id": 86, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/space", + "geolocation": null, + "intent_template": "Consider {{modifier}}. What is {{question}}?", + "intent": "Consider all post in this page. What is the username of the user that posted the picture where the moon takes up the largest proportion of the image?", + "image": [], + "instantiation_dict": { + "modifier": "all post in this page", + "question": "the username of the user that posted the picture where the moon takes up the largest proportion of the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "daryavaseum" + ] + }, + "reference_url": null + }, + "intent_template_id": 37, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "test scrolling. __REDDIT__/f/space/27421" + }, + { + "sites": [ + "reddit" + ], + "task_id": 87, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/aww", + "geolocation": null, + "intent_template": "Consider {{modifier}}. What is {{question}}?", + "intent": "Consider all post in this page. What is the total number of dogs shown?", + "image": [], + "instantiation_dict": { + "modifier": "all post in this page", + "question": "the total number of dogs shown" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "9" + ] + }, + "reference_url": null + }, + "intent_template_id": 37, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "test scrolling" + }, + { + "sites": [ + "reddit" + ], + "task_id": 88, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/headphones?next%5Branking%5D=514&next%5Bid%5D=126662", + "geolocation": null, + "intent_template": "Upvote all the posts that {{modifier}} on this page.", + "intent": "Upvote all the posts that do not contain headphones on this page.", + "image": [], + "instantiation_dict": { + "modifier": "do not contain headphones" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "__REDDIT__/f/headphones/105770", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/headphones/20186", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote vote--user-upvoted" + } + }, + { + "url": "__REDDIT__/f/headphones/126662", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + }, + { + "url": "__REDDIT__/f/headphones/61993", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + }, + { + "url": "__REDDIT__/f/headphones/105934", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + }, + { + "url": "__REDDIT__/f/headphones/84223", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": { + "exact_match": "vote" + } + } + ] + }, + "intent_template_id": 34, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "Note: Does not check every single post on the page. Upvote: 2, 3, 4 (105770), 8 (20186), 14, 18 (126736), 19 (20035), 22, 24 (105847). Ignores __REDDIT__/f/headphones/126736, __REDDIT__/f/headphones/20035, __REDDIT__/f/headphones/105847 for ambiguity (headphones vs. earphones). Test negative selection" + }, + { + "sites": [ + "reddit" + ], + "task_id": 89, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/space/112972", + "geolocation": null, + "intent_template": "{{question}}? Leave a comment with the following format: {{format}}.", + "intent": "How many Jupiter instances are in this picture? Leave a comment with the following format: \"It has Jupiter!!\" (not including the double inverted commas), replacing with the actual number of Jupiter in the picture. Remember to use digits.", + "image": [], + "instantiation_dict": { + "question": "How many Jupiter instances are in this picture", + "format": "\"It has Jupiter!!\" (not including the double inverted commas), replacing with the actual number of Jupiter in the picture. Remember to use digits" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "__REDDIT__/f/space/112972", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "exact_match": "It has 4 Jupiter!!" + } + } + ] + }, + "intent_template_id": 38, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "some basic tasks" + }, + { + "sites": [ + "reddit" + ], + "task_id": 90, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/mildlyinteresting/21720", + "geolocation": null, + "intent_template": "{{question}}? Leave a comment with the following format: {{format}}.", + "intent": "How many teeth does the pumpkin have? Leave a comment with the following format: \"It has teeth :)\" (not including the double inverted commas), replacing with the actual number of teeth the pumpkin has. Remember to use digits.", + "image": [], + "instantiation_dict": { + "question": "How many teeth does the pumpkin have", + "format": "\"It has teeth :)\" (not including the double inverted commas), replacing with the actual number of teeth the pumpkin has. Remember to use digits" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "__REDDIT__/f/mildlyinteresting/21720", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "exact_match": "It has 4 teeth :)" + } + } + ] + }, + "intent_template_id": 38, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "some basic tasks" + }, + { + "sites": [ + "reddit" + ], + "task_id": 91, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/mildlyinteresting/42168", + "geolocation": null, + "intent_template": "{{question}}? Leave a comment with the following format: {{format}}.", + "intent": "What is the color of topmost container? Leave a comment with the following format: \"Let's go \" (not including the double inverted commas), replacing with the actual color of the topmost container. Remember to use basic color names.", + "image": [], + "instantiation_dict": { + "question": "What is the color of topmost container", + "format": "\"Let's go \" (not including the double inverted commas), replacing with the actual color of the topmost container. Remember to use basic color names" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "__REDDIT__/f/mildlyinteresting/42168", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "exact_match": "Let's go green" + } + } + ] + }, + "intent_template_id": 38, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "some basic tasks" + }, + { + "sites": [ + "reddit" + ], + "task_id": 92, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "{{question}} the picture {{modifier}} in {{forum}}?", + "intent": "What is the animal in the picture from the most active post in f/aww?", + "image": [], + "instantiation_dict": { + "question": "What is the animal in", + "modifier": "from the most active post", + "forum": "f/aww" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "dog" + ] + } + }, + "intent_template_id": 39, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "some basic tasks" + }, + { + "sites": [ + "reddit" + ], + "task_id": 93, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "{{question}} the picture {{modifier}} in {{forum}}?", + "intent": "What is the color of the plate used for the picture from the most controversial post of all time in f/food?", + "image": [], + "instantiation_dict": { + "question": "What is the color of the plate used for", + "modifier": "from the most controversial post of all time", + "forum": "f/food" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "white" + ] + } + }, + "intent_template_id": 39, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "some basic tasks. __REDDIT__/f/food/controversial?t=all" + }, + { + "sites": [ + "reddit" + ], + "task_id": 94, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "{{question}} the picture {{modifier}} in {{forum}}?", + "intent": "Is there snow in the picture from the hottest post in f/EarthPorn?", + "image": [], + "instantiation_dict": { + "question": "Is there snow in", + "modifier": "from the hottest post", + "forum": "f/EarthPorn" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "yes" + ] + } + }, + "intent_template_id": 39, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "some basic tasks" + }, + { + "sites": [ + "reddit" + ], + "task_id": 95, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "{{question}} the picture {{modifier}} in {{forum}}?", + "intent": "What is the color of the snow near the Chrismas tree in the picture from the second hottest post in f/newhaven?", + "image": [], + "instantiation_dict": { + "question": "What is the color of the snow near the Chrismas tree in", + "modifier": "from the second hottest post", + "forum": "f/newhaven" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "purple |OR| pink" + ] + } + }, + "intent_template_id": 39, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__REDDIT__/f/newhaven/64803/first-snowfall-of-the-season-on-the-green" + }, + { + "sites": [ + "reddit" + ], + "task_id": 96, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "{{question}} the picture {{modifier}} in {{forum}}?", + "intent": "Who authored the post shown in the picture (not the reddit author) of the most commented post of all time in f/Newark?", + "image": [], + "instantiation_dict": { + "question": "Who authored the post shown in", + "modifier": "(not the reddit author) of the most commented post of all time", + "forum": "f/Newark" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Councilman Michael Silva" + ] + } + }, + "intent_template_id": 39, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "some basic tasks" + }, + { + "sites": [ + "reddit" + ], + "task_id": 97, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "{{question}} the picture {{modifier}} in {{forum}}?", + "intent": "What is the color of the radio in the picture from the newest post in f/BuyItForLife?", + "image": [], + "instantiation_dict": { + "question": "What is the color of the radio in", + "modifier": "from the newest post", + "forum": "f/BuyItForLife" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "white" + ] + } + }, + "intent_template_id": 39, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "some basic tasks" + }, + { + "sites": [ + "reddit" + ], + "task_id": 98, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Navigate to the comments section of a post that {{condition}}.", + "intent": "Navigate to the comments section of a post that contains a picture of a dog.", + "image": [], + "instantiation_dict": { + "condition": "contains a picture of a dog" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_vqa": [ + { + "question": "Does the picture contain a dog? (yes/no)", + "answer": "yes" + } + ] + } + ] + }, + "intent_template_id": 40, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "just search \"dog picture\"" + }, + { + "sites": [ + "reddit" + ], + "task_id": 99, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Navigate to the comments section of a post that {{condition}}.", + "intent": "Navigate to the comments section of a post that contains a picture of a dog and a cat.", + "image": [], + "instantiation_dict": { + "condition": "contains a picture of a dog and a cat" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_vqa": [ + { + "question": "Does the picture contain a dog? (yes/no)", + "answer": "yes" + }, + { + "question": "Does the picture contain a cat? (yes/no)", + "answer": "yes" + } + ] + } + ] + }, + "intent_template_id": 40, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "just search \"dog cat picture\"" + }, + { + "sites": [ + "reddit" + ], + "task_id": 100, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Navigate to the comments section of a post that {{condition}}.", + "intent": "Navigate to the comments section of a post that contains a picture of a cake.", + "image": [], + "instantiation_dict": { + "condition": "contains a picture of a cake" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_vqa": [ + { + "question": "Does the picture contain a cake? (yes/no)", + "answer": "yes" + } + ] + } + ] + }, + "intent_template_id": 40, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "just search \"cake\"" + }, + { + "sites": [ + "reddit" + ], + "task_id": 101, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Navigate to the comments section of a post that {{condition}}.", + "intent": "Navigate to the comments section of a post that contains a picture of a keyboard.", + "image": [], + "instantiation_dict": { + "condition": "contains a picture of a keyboard" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_vqa": [ + { + "question": "Does the picture contain a keyboard? (yes/no)", + "answer": "yes" + } + ] + } + ] + }, + "intent_template_id": 40, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "just search \"keyboard\"" + }, + { + "sites": [ + "reddit" + ], + "task_id": 102, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/MechanicalKeyboards/56362", + "geolocation": null, + "intent_template": "{{question}}? Leave a comment replying to user \"{{user}}\" with the following format: {{format}}.", + "intent": "How many red keys are in the picture? Leave a comment replying to user \"liverblow\" with the following format: \"It has $%@\" (not including the double inverted commas), replacing with the actual number of red key. Remember to use digits.", + "image": [], + "instantiation_dict": { + "question": "How many red keys are in the picture", + "user": "liverblow", + "format": "\"It has $%@\" (not including the double inverted commas), replacing with the actual number of red key. Remember to use digits" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/MechanicalKeyboards/56362", + "locator": "func:reddit_get_parent_comment_username_of_latest_comment_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "exact_match": "liverblow" + } + }, + { + "url": "__REDDIT__/f/MechanicalKeyboards/56362", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "exact_match": "It has 3 $%@" + } + } + ] + }, + "intent_template_id": 41, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "reddit" + ], + "task_id": 103, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/memes/41674", + "geolocation": null, + "intent_template": "{{question}}? Leave a comment replying to user \"{{user}}\" with the following format: {{format}}.", + "intent": "What is the color of the mattress? Leave a comment replying to user \"BiAdventureTime\" with the following format: \"Its color is ~~~~``~~~~\" (not including the quote marks), replacing with the actual color of the mattress. Remember to use basic color names.", + "image": [], + "instantiation_dict": { + "question": "What is the color of the mattress", + "user": "BiAdventureTime", + "format": "\"Its color is ~~~~``~~~~\" (not including the quote marks), replacing with the actual color of the mattress. Remember to use basic color names" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/memes/41674", + "locator": "func:reddit_get_parent_comment_username_of_latest_comment_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "exact_match": "BiAdventureTime" + } + }, + { + "url": "__REDDIT__/f/MechanicalKeyboards/56362", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "exact_match": "Its color is blue ~~~~``~~~~" + } + } + ] + }, + "intent_template_id": 41, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "reddit" + ], + "task_id": 104, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/memes/21059", + "geolocation": null, + "intent_template": "{{question}}? Leave a comment replying to user \"{{user}}\" with the following format: {{format}}.", + "intent": "How many kirbies are in the picture? Leave a comment replying to user \"Valenyn\" with the following format: \"There are kirbies *\uff65\u309c\uff9f\uff65*:.\uff61..\uff61.:*\uff65'(*\uff9f\u25bd\uff9f*)'\uff65*:.\uff61. .\uff61.:*\uff65\u309c\uff9f\uff65*\" (not including the quote marks), replacing with the actual number of kirbies. Remember to use digits.", + "image": [], + "instantiation_dict": { + "question": "How many kirbies are in the picture", + "user": "Valenyn", + "format": "\"There are kirbies *\uff65\u309c\uff9f\uff65*:.\uff61..\uff61.:*\uff65'(*\uff9f\u25bd\uff9f*)'\uff65*:.\uff61. .\uff61.:*\uff65\u309c\uff9f\uff65*\" (not including the quote marks), replacing with the actual number of kirbies. Remember to use digits" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/memes/21059", + "locator": "func:reddit_get_parent_comment_username_of_latest_comment_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "exact_match": "Valenyn" + } + }, + { + "url": "__REDDIT__/f/MechanicalKeyboards/56362", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "exact_match": "There are 3 kirbies *\uff65\u309c\uff9f\uff65*:.\uff61..\uff61.:*\uff65'(*\uff9f\u25bd\uff9f*)'\uff65*:.\uff61. .\uff61.:*\uff65\u309c\uff9f\uff65*" + } + } + ] + }, + "intent_template_id": 41, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "reddit" + ], + "task_id": 105, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Subscribe to a forum that contains {{picture}}.", + "intent": "Subscribe to a forum that contains one of the following pictures.", + "image": [ + "__REDDIT__/submission_images/73dc8340f062f2514d681f058f166291f996308c853b0991b1d70aeec290bc3f.png", + "__REDDIT__/submission_images/c3e0f19b9c34fb663c059e6001b96c416d67f95e74ef923c3814effe734d1ffe.jpg" + ], + "instantiation_dict": { + "picture": "one of the following pictures" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__", + "locator": "func:get_query_text(__page__, '.sidebar')", + "required_contents": { + "must_include": [ + "gaming |OR| mildlyinteresting" + ] + } + } + ] + }, + "intent_template_id": 42, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__REDDIT__/126091" + }, + { + "sites": [ + "reddit" + ], + "task_id": 106, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Subscribe to a forum that contains {{picture}}.", + "intent": "Subscribe to a forum that contains the following picture.", + "image": "__REDDIT__/submission_images/1479b251ab514f120dc5049645aebb24c1ffb22c6a7c358322f8f7e901caa330.png", + "instantiation_dict": { + "picture": "the following picture" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__", + "locator": "func:get_query_text(__page__, '.sidebar')", + "required_contents": { + "must_include": [ + "dataisbeautiful" + ] + } + } + ] + }, + "intent_template_id": 42, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__REDDIT__/39053" + }, + { + "sites": [ + "reddit" + ], + "task_id": 107, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Subscribe to a forum that contains {{picture}}.", + "intent": "Subscribe to a forum that contains the following picture.", + "image": [ + "__REDDIT__/submission_images/c35e8ec31c5d6749de609413a87e2cacc00a5b5173fdd2c32bee563539c95349.jpg" + ], + "instantiation_dict": { + "picture": "the following picture" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__", + "locator": "func:get_query_text(__page__, '.sidebar')", + "required_contents": { + "must_include": [ + "iphone" + ] + } + } + ] + }, + "intent_template_id": 42, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__REDDIT__/41010" + }, + { + "sites": [ + "reddit" + ], + "task_id": 108, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Send a direct message to the user that created the post that contains the following picture.", + "intent": "Send a direct message to the user that created the post that contains the following picture.", + "image": [ + "__REDDIT__/submission_images/56770deac1bc216e20097184d3b5710708cc72cc3bb8ed00883066f2815020cd.jpg" + ], + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__/messages", + "locator": "func:get_query_text(__page__, '.table')", + "required_contents": { + "must_include": [ + "J3RRYLIKESCHEESE" + ] + } + } + ] + }, + "intent_template_id": 43, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__REDDIT__/27284" + }, + { + "sites": [ + "reddit" + ], + "task_id": 109, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Send a direct message to the user that created the post that contains the following picture.", + "intent": "Send a direct message to the user that created the post that contains the following picture.", + "image": [ + "__REDDIT__/submission_images/e44dc6ea011d1876ea96210b4aae47f8dc334dd975f53b6fa902535a0748bee4.jpg" + ], + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__/messages", + "locator": "func:get_query_text(__page__, '.table')", + "required_contents": { + "must_include": [ + "Throwaway0114845892" + ] + } + } + ] + }, + "intent_template_id": 43, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__REDDIT__/107539" + }, + { + "sites": [ + "reddit" + ], + "task_id": 110, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Send a direct message to the user that created the post that contains the following picture.", + "intent": "Send a direct message to the user that created the post that contains the following picture.", + "image": [ + "__REDDIT__/submission_images/0e2afc6ce7d06b708f04ece3b0481d64aed4deab399c13e6eb9343525c730993.png" + ], + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__/messages", + "locator": "func:get_query_text(__page__, '.table')", + "required_contents": { + "must_include": [ + "lostinam3rica" + ] + } + } + ] + }, + "intent_template_id": 43, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__REDDIT__/38612" + }, + { + "sites": [ + "reddit" + ], + "task_id": 111, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a post for each of the following image in the most related forum.", + "intent": "Create a post for each of the following image in the most related forum.", + "image": [ + "https://images.pexels.com/photos/2539395/pexels-photo-2539395.jpeg", + "https://images.pexels.com/photos/16194823/pexels-photo-16194823/free-photo-of-people-in-front-of-quincy-market-in-usa.jpeg" + ], + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "reference_answers": null, + "page_image_query": [ + { + "eval_image_url": "__REDDIT__/f/Washington/top?t=day", + "eval_image_class": ".submission__title", + "eval_fuzzy_image_match": "https://images.pexels.com/photos/2539395/pexels-photo-2539395.jpeg", + "ssim_threshold": 0.6 + }, + { + "eval_image_url": "__REDDIT__/f/boston/top?t=day", + "eval_image_class": ".submission__title", + "eval_fuzzy_image_match": "https://images.pexels.com/photos/16194823/pexels-photo-16194823/free-photo-of-people-in-front-of-quincy-market-in-usa.jpeg", + "ssim_threshold": 0.6 + } + ] + }, + "intent_template_id": 44, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "/f/Washington, /f/boston" + }, + { + "sites": [ + "reddit" + ], + "task_id": 112, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a post for each of the following image in the most related forum.", + "intent": "Create a post for each of the following image in the most related forum.", + "image": [ + "https://images.pexels.com/photos/2150/sky-space-dark-galaxy.jpg", + "https://images.pexels.com/photos/1037992/pexels-photo-1037992.jpeg" + ], + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "reference_answers": null, + "page_image_query": [ + { + "eval_image_url": "__REDDIT__/f/space/top?t=day", + "eval_image_class": ".submission__title", + "eval_fuzzy_image_match": "https://images.pexels.com/photos/2150/sky-space-dark-galaxy.jpg", + "ssim_threshold": 0.6 + }, + { + "eval_image_url": "__REDDIT__/f/headphones/top?t=day", + "eval_image_class": ".submission__title", + "eval_fuzzy_image_match": "https://images.pexels.com/photos/1037992/pexels-photo-1037992.jpeg", + "ssim_threshold": 0.6 + } + ] + }, + "intent_template_id": 44, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "/f/space, /f/headphones" + }, + { + "sites": [ + "reddit" + ], + "task_id": 113, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a post for each of the following image in the most related forum.", + "intent": "Create a post for each of the following image in the most related forum.", + "image": [ + "https://images.pexels.com/photos/417074/pexels-photo-417074.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "https://images.pexels.com/photos/2706653/pexels-photo-2706653.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "https://images.pexels.com/photos/7947958/pexels-photo-7947958.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1" + ], + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "reference_answers": null, + "page_image_query": [ + { + "eval_image_url": "__REDDIT__/f/EarthPorn/top?t=day", + "eval_image_class": ".submission__title", + "eval_fuzzy_image_match": "https://images.pexels.com/photos/417074/pexels-photo-417074.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "ssim_threshold": 0.6 + }, + { + "eval_image_url": "__REDDIT__/f/nyc/top?t=day", + "eval_image_class": ".submission__title", + "eval_fuzzy_image_match": "https://images.pexels.com/photos/2706653/pexels-photo-2706653.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "ssim_threshold": 0.6 + }, + { + "eval_image_url": "__REDDIT__/f/dataisbeautiful/top?t=day", + "eval_image_class": ".submission__title", + "eval_fuzzy_image_match": "https://images.pexels.com/photos/7947958/pexels-photo-7947958.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "ssim_threshold": 0.6 + } + ] + }, + "intent_template_id": 44, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "/f/EarthPorn, /f/nyc, /f/dataisbeautiful" + }, + { + "sites": [ + "reddit" + ], + "task_id": 114, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Navigate to the {{status}} across {{forums}}. {{question}}?", + "intent": "Navigate to the second most active post with a picture across f/washingtondc, f/vermont and f/nyc. What is in the picture?", + "image": [], + "instantiation_dict": { + "status": "second most active post with a picture", + "forums": "f/washingtondc, f/vermont and f/nyc", + "question": "What is in the picture" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "string_match" + ], + "reference_answers": { + "must_include": [ + "firetruck |OR| fire engine" + ] + }, + "reference_url": "__REDDIT__/f/washingtondc/136747", + "url_note": "GOLD in PRED" + }, + "intent_template_id": 45, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "sol1: __REDDIT__/f/washingtondc+vermont+nyc/active?t=all; sol2: subscribe/unsubscribe to related forum and the result will show in homepage" + }, + { + "sites": [ + "reddit" + ], + "task_id": 115, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Navigate to the {{status}} across {{forums}}. {{question}}?", + "intent": "Navigate to the most controversial post of all time (with a picture) across f/Paterson, f/WaterburyCT and f/monitor. What operating system is the person using (windows/ubuntu/arch/macos)?", + "image": [], + "instantiation_dict": { + "status": "most controversial post of all time (with a picture)", + "forums": "f/Paterson, f/WaterburyCT and f/monitor", + "question": "What operating system is the person using (windows/ubuntu/arch/macos)" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "string_match" + ], + "reference_answers": { + "must_include": [ + "windows" + ] + }, + "reference_url": "__REDDIT__/f/monitor/107539", + "url_note": "GOLD in PRED" + }, + "intent_template_id": 45, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "sol1: __REDDIT__/f/monitor+Paterson+WaterburyCT/controversial?t=all; sol2: subscribe/unsubscribe to related forum and the result will show in homepage" + }, + { + "sites": [ + "reddit" + ], + "task_id": 116, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Navigate to the {{status}} across {{forums}}. {{question}}?", + "intent": "Navigate to the most commented post of all time (with a picture) across f/aww and f/dataisbeautiful. Does the picture contain an animal (yes/no)?", + "image": [], + "instantiation_dict": { + "status": "most commented post of all time (with a picture)", + "forums": "f/aww and f/dataisbeautiful", + "question": "Does the picture contain an animal (yes/no)" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "string_match" + ], + "reference_answers": { + "must_include": [ + "yes" + ] + }, + "reference_url": "__REDDIT__/f/aww/80903", + "url_note": "GOLD in PRED" + }, + "intent_template_id": 45, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "sol1: __REDDIT__/f/aww+dataisbeautiful/most_commented?t=all; sol2: subscribe/unsubscribe to related forum and the result will show in homepage" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 117, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/memes/85200 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "What is the birth year of the {{character}}? Answer using the Wikipedia site in the second tab.", + "intent": "What is the birth year of the dog with pink hair? Answer using the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "character": "dog with pink hair" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2005" + ] + } + }, + "intent_template_id": 46, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Doge_(meme)" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 118, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/memes/106760 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "What is the birth year of the {{character}}? Answer using the Wikipedia site in the second tab.", + "intent": "What is the birth year of the guy in red jacket? Answer using the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "character": "guy in red jacket" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1986" + ] + } + }, + "intent_template_id": 46, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Drake_(musician)" + }, + { + "sites": [ + "wikipedia", + "reddit" + ], + "task_id": 119, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/memes/62937 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing", + "geolocation": null, + "intent_template": "What is the birth year of the {{character}}? Answer using the Wikipedia site in the second tab.", + "intent": "What is the birth year of the guy holding the goblet? Answer using the Wikipedia site in the second tab.", + "image": [], + "instantiation_dict": { + "character": "guy holding the goblet" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1974" + ] + } + }, + "intent_template_id": 46, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Leonardo_DiCaprio" + }, + { + "sites": [ + "reddit" + ], + "task_id": 120, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/submission_images/361ec602ee63f8d052217fe6577e7163471ae1b86280acd498aa626a28239da2.jpg", + "geolocation": null, + "intent_template": "According to the image, {{question}}?", + "intent": "According to the image, which country in this picture has the worst rail system?", + "image": [], + "instantiation_dict": { + "question": "which country in this picture has the worst rail system" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "U.S. |OR| United States |OR| US" + ] + } + }, + "intent_template_id": 47, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__REDDIT__/f/memes/106690" + }, + { + "sites": [ + "reddit" + ], + "task_id": 121, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "I'm doing some research about the post with this image. Navigate to the recipe posted by the OP in the comments.", + "intent": "I'm doing some research about the post with this image. Navigate to the recipe posted by the OP in the comments.", + "image": "__REDDIT__/submission_images/026347b13e015519ffdb759fdb368c33ba7ada80b66e4ff8947fcc9cd3c41433.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/food/125525/-/comment/2481750", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 48, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "__REDDIT__/f/food/125525/homemade-margherita-pizza" + }, + { + "sites": [ + "reddit" + ], + "task_id": 122, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "I'm doing some research about the post with this image. Navigate to the recipe posted by the OP in the comments.", + "intent": "I'm doing some research about the post with this image. Navigate to the recipe posted by the OP in the comments.", + "image": "__REDDIT__/submission_images/44269ef7e842aaa07cbe1de220cd59dd2161be3f445f6ca913da8fee650e438f.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/food/18967/-/comment/114718", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 48, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "__REDDIT__/f/food/18967/homemade-korean-fried-chicken" + }, + { + "sites": [ + "reddit" + ], + "task_id": 123, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find me a post in {{forum}} that illustrates the {{company}} revenue in {{datetime}}, and tell me the revenue of {{department}} (in billions).", + "intent": "Find me a post in /f/wallstreetbets that illustrates the Amazon revenue in Q3 FY22, and tell me the revenue of the online store in Q3 FY22 (in billions).", + "image": null, + "instantiation_dict": { + "forum": "/f/wallstreetbets", + "company": "Amazon", + "datetime": "Q3 FY22", + "department": "the online store in Q3 FY22" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "must_include": [ + "53.4" + ] + }, + "reference_url": "__REDDIT__/f/wallstreetbets/29340 |OR| __REDDIT__/f/wallstreetbets/29340/visual-breakdown-of-amzn-q3-fy22-income-statement" + }, + "intent_template_id": 49, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__REDDIT__/f/wallstreetbets/29340" + }, + { + "sites": [ + "reddit" + ], + "task_id": 124, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find me a post in {{forum}} that illustrates the {{company}} revenue in {{datetime}}, and tell me the revenue of {{department}} (in billions).", + "intent": "Find me a post in /f/dataisbeautiful that illustrates the Microsoft revenue in 1985, and tell me the revenue of the whole company in 1985 (in billions).", + "image": null, + "instantiation_dict": { + "forum": "/f/dataisbeautiful", + "company": "Microsoft", + "datetime": "1985", + "department": "the whole company in 1985" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0.14" + ] + }, + "reference_url": "__REDDIT__/f/dataisbeautiful/18389 |OR| __REDDIT__/f/dataisbeautiful/18389/oc-microsoft-ipo-vs-now-breaking-down-its-revenue-and" + }, + "intent_template_id": 49, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__REDDIT__/f/dataisbeautiful/18389" + }, + { + "sites": [ + "reddit" + ], + "task_id": 125, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find me a post in {{forum}} that illustrates the {{company}} revenue in {{datetime}}, and tell me the revenue of {{department}} (in billions).", + "intent": "Find me a post in /f/dataisbeautiful that illustrates the Nvidia revenue in FY22, and tell me the revenue of the whole company in FY22 (in billions).", + "image": null, + "instantiation_dict": { + "forum": "/f/dataisbeautiful", + "company": "Nvidia", + "datetime": "FY22", + "department": "the whole company in FY22" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "must_include": [ + "27" + ] + }, + "reference_url": "__REDDIT__/f/dataisbeautiful/124787 |OR| __REDDIT__/f/dataisbeautiful/124787/oc-nvidia-s-fy22-income-statement" + }, + "intent_template_id": 49, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__REDDIT__/f/dataisbeautiful/124787" + }, + { + "sites": [ + "reddit" + ], + "task_id": 126, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you navigate me to the most commented post by the author of this image? {{hint}}", + "intent": "Can you navigate me to the most commented post by the author of this image? I think it is in the /f/memes forum.", + "image": "__REDDIT__/submission_images/84eb6034d17330ca0f528293ed68c9106639a5e616c5d88b80503da22bf5ab24.jpg", + "instantiation_dict": { + "hint": "I think it is in the /f/memes forum." + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/photoshopbattles/24855 |OR| __REDDIT__/f/photoshopbattles/24855/psbattle-this-dog-and-her-pups", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 50, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "__REDDIT__/f/memes/85103" + }, + { + "sites": [ + "reddit" + ], + "task_id": 127, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you navigate me to the most commented post by the author of this image? {{hint}}", + "intent": "Can you navigate me to the most commented post by the author of this image? I think it is in the /f/memes forum.", + "image": "__REDDIT__/submission_images/7dff99a65aa39d065b1fb284182be5fd60403d42c5fcac48ac89ff04426cf2b3.png", + "instantiation_dict": { + "hint": "I think it is in the /f/memes forum." + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/memes/41719 |OR| __REDDIT__/f/memes/41719/where-my-6-4-gb", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 50, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "__REDDIT__/f/memes/41719/where-my-6-4-gb" + }, + { + "sites": [ + "reddit" + ], + "task_id": 128, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you navigate me to the most commented post by the author of this image? {{hint}}", + "intent": "Can you navigate me to the most commented post by the author of this image? I think it is in the /f/pics forum.", + "image": "__REDDIT__/submission_images/39d43f006044f9ec1642678ceaa0eb97cd45e4dc6132a34c8a3c7497f5b97670.jpg", + "instantiation_dict": { + "hint": "I think it is in the /f/pics forum." + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/pics/25017 |OR| __REDDIT__/f/pics/25017/my-oil-painting-of-mcdonald-s-filet-o-fish", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 50, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "__REDDIT__/f/pics/45579" + }, + { + "sites": [ + "reddit" + ], + "task_id": 129, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Subscribe to the forum which is most likely to include discussions related to the image.", + "intent": "Subscribe to the forum which is most likely to include discussions related to the image.", + "image": "https://images.pexels.com/photos/671629/pexels-photo-671629.jpeg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__", + "locator": "document.querySelector('.sidebar').textContent", + "required_contents": { + "must_include": [ + "MechanicalKeyboards" + ] + } + } + ] + }, + "intent_template_id": 51, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "reddit" + ], + "task_id": 130, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Subscribe to the forum which is most likely to include discussions related to the image.", + "intent": "Subscribe to the forum which is most likely to include discussions related to the image.", + "image": "__REDDIT__/submission_images/e9e9da852e390b2f350dba55990f012352679d753034b3a3cb084f9eb9f9d44c.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__", + "locator": "document.querySelector('.sidebar').textContent", + "required_contents": { + "must_include": [ + "memes" + ] + } + } + ] + }, + "intent_template_id": 51, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "reddit" + ], + "task_id": 131, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Subscribe to the forum which is most likely to include discussions related to the image.", + "intent": "Subscribe to the forum which is most likely to include discussions related to the image.", + "image": "https://images.pexels.com/photos/6801874/pexels-photo-6801874.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__", + "locator": "document.querySelector('.sidebar').textContent", + "required_contents": { + "must_include": [ + "wallstreetbets |OR| personalfinance" + ] + } + } + ] + }, + "intent_template_id": 51, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "reddit" + ], + "task_id": 132, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "How many comments did the post with this image receive? I remember it being one of the top 50 hot posts of {{forum}}.", + "intent": "How many comments did the post with this image receive? I remember it being one of the top 50 hot posts of /f/nyc.", + "image": "__REDDIT__/submission_images/a6728cae79355e9d47e9f6531c5af8c2801ae976a7c62c73a3bd1711bb44ac17.jpg", + "instantiation_dict": { + "forum": "/f/nyc" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "146" + ] + } + }, + "intent_template_id": 52, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__REDDIT__/f/nyc/88264" + }, + { + "sites": [ + "reddit" + ], + "task_id": 133, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "How many comments did the post with this image receive? I remember it being one of the top 50 hot posts of {{forum}}.", + "intent": "How many comments did the post with this image receive? I remember it being one of the top 50 hot posts of /f/ColumbiaMD.", + "image": "__REDDIT__/submission_images/16da5c8bc5b5a22446d5f8a8d10776ea9b99cb89a215f61d7fb806eb8b0cd28e.png", + "instantiation_dict": { + "forum": "/f/ColumbiaMD" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "23" + ] + } + }, + "intent_template_id": 52, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__REDDIT__/f/ColumbiaMD/75767" + }, + { + "sites": [ + "reddit" + ], + "task_id": 134, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "How many comments did the post with this image receive? I remember it being one of the top 50 hot posts of {{forum}}.", + "intent": "How many comments did the post with this image receive? I remember it being one of the top 50 hot posts of /f/Maine.", + "image": "__REDDIT__/submission_images/3d8439cefc3104ad519424a4f5d7df87b67abfdbcbdd9151b826538d6438d5e2.jpg", + "instantiation_dict": { + "forum": "/f/Maine" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "29" + ] + } + }, + "intent_template_id": 52, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__REDDIT__/f/Maine/120773" + }, + { + "sites": [ + "reddit" + ], + "task_id": 135, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you search for '{{term}}' and find the post with {{number}} upvotes and includes a meme of {{item}}?", + "intent": "Can you search for 'meme' and find the post with 2 upvotes and includes a meme of three Spider-Men, a flag of the Netherlands, and a flag of Croatia?", + "image": null, + "instantiation_dict": { + "term": "meme", + "number": "2", + "item": "three Spider-Men, a flag of the Netherlands, and a flag of Croatia" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/memes/63334 |OR| __REDDIT__/f/memes/63334/if-i-missed-any-flags-that-look-like-these-feel-free-to-add", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 53, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "__REDDIT__/f/memes/63334" + }, + { + "sites": [ + "reddit" + ], + "task_id": 136, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you navigate to the post with {{number}} upvotes, which includes a meme of {{item}}? {{hint}}", + "intent": "Can you navigate to the post with 307 upvotes, which includes a meme of a dog sitting in a room that is engulfed in flames and saying it is fine? I think this was posted in /f/wallstreetbets.", + "image": null, + "instantiation_dict": { + "hint": "I think this was posted in /f/wallstreetbets.", + "number": "307", + "item": "a dog sitting in a room that is engulfed in flames and saying it is fine" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/wallstreetbets/136131 |OR| __REDDIT__/f/wallstreetbets/136131/meirl", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 54, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "__REDDIT__/f/wallstreetbets/136131" + }, + { + "sites": [ + "reddit" + ], + "task_id": 137, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you navigate to the post with {{number}} upvotes, which includes a meme of {{item}}? {{hint}}", + "intent": "Can you navigate to the post with 128 upvotes, which includes a meme of Squidward watching SpongeBob and Patrick through a window? I think this was posted in /f/wallstreetbets.", + "image": null, + "instantiation_dict": { + "hint": "I think this was posted in /f/wallstreetbets.", + "number": "128", + "item": "Squidward watching SpongeBob and Patrick through a window" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/wallstreetbets/50264 |OR| __REDDIT__/f/wallstreetbets/50264/money-vacuum-go-shhloooop", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 54, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "__REDDIT__/f/wallstreetbets/50264" + }, + { + "sites": [ + "reddit" + ], + "task_id": 138, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Change my username to the first name of the recipient in the image.", + "intent": "Change my username to the first name of the recipient in the image.", + "image": "https://images.pexels.com/photos/6347512/pexels-photo-6347512.jpeg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__", + "locator": "func:get_query_text(__page__, '.site-nav')", + "required_contents": { + "must_include": [ + "patrick" + ] + } + } + ] + }, + "intent_template_id": 55, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "reddit" + ], + "task_id": 139, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "I {{interaction}}. Can you help me {{action}}?", + "intent": "I don't like the author of this image from one of the hot posts in /f/memes. Can you help me block them?", + "image": "__REDDIT__/submission_images/43a7a1ca5d08b16fcb11926a7bcc4b16476ed8e62a55bdd917b1020f2510f2e2.jpg", + "instantiation_dict": { + "interaction": "don't like the author of this image from one of the hot posts in /f/memes", + "action": "block them" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__/user/MarvelsGrantMan136/block_list", + "locator": "func:get_query_text(__page__, '.site-content')", + "required_contents": { + "must_include": [ + "Jamminmb" + ] + } + } + ] + }, + "intent_template_id": 56, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__REDDIT__/f/memes/41741" + }, + { + "sites": [ + "reddit" + ], + "task_id": 140, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "I {{interaction}}. Can you help me {{action}}?", + "intent": "I would like to know the author of this image, from one of the top posts in /f/pics. Can you help me send them a dm with \"Hello\"?", + "image": "__REDDIT__/submission_images/6bfbf1280d28d84a9261695f0cac5a90addaaff1174807a2b381fdc159f3ed00.jpg", + "instantiation_dict": { + "interaction": "would like to know the author of this image, from one of the top posts in /f/pics", + "action": "send them a dm with \"Hello\"" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__/messages", + "locator": "func:get_query_text(__page__, '.table')", + "required_contents": { + "must_include": [ + "o0_bobbo_0o", + "Hello" + ] + } + } + ] + }, + "intent_template_id": 56, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__REDDIT__/f/pics/25046" + }, + { + "sites": [ + "reddit" + ], + "task_id": 141, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/movies/63905", + "geolocation": null, + "intent_template": "Leave a comment in this post with the release date of the movie in the image in mm-dd-yyyy format.", + "intent": "Leave a comment in this post with the release date of the movie in the image in mm-dd-yyyy format.", + "image": null, + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__/f/movies/63905", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "must_include": [ + "08-11-2023" + ] + } + } + ] + }, + "intent_template_id": 57, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__REDDIT__/f/movies/63905" + }, + { + "sites": [ + "reddit" + ], + "task_id": 142, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/movies/86107", + "geolocation": null, + "intent_template": "Leave a comment in this post with the release date of the movie in the image in mm-dd-yyyy format.", + "intent": "Leave a comment in this post with the release date of the movie in the image in mm-dd-yyyy format.", + "image": null, + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__/f/movies/86107", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "must_include": [ + "03-03-2023" + ] + } + } + ] + }, + "intent_template_id": 57, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__REDDIT__/f/movies/86107" + }, + { + "sites": [ + "reddit" + ], + "task_id": 143, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/movies/128615", + "geolocation": null, + "intent_template": "Leave a comment in this post with the release date of the movie in the image in mm-dd-yyyy format.", + "intent": "Leave a comment in this post with the release date of the movie in the image in mm-dd-yyyy format.", + "image": null, + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__/f/movies/128615", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "must_include": [ + "03-31-2023" + ] + } + } + ] + }, + "intent_template_id": 57, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__REDDIT__/f/movies/128615" + }, + { + "sites": [ + "reddit" + ], + "task_id": 144, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/wallstreetbets/72041", + "geolocation": null, + "intent_template": "Leave a comment in this post with the text as the trading loss of the OP in dollars.", + "intent": "Leave a comment in this post with the text as the trading loss of the OP in dollars.", + "image": null, + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__/f/wallstreetbets/72041", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "must_include": [ + "31146.66 |OR| 31,146.66" + ] + } + } + ] + }, + "intent_template_id": 58, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "__REDDIT__/f/wallstreetbets/72041" + }, + { + "sites": [ + "reddit" + ], + "task_id": 145, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/wallstreetbets/136297", + "geolocation": null, + "intent_template": "Leave a comment in this post with the text as the trading loss of the OP in euros.", + "intent": "Leave a comment in this post with the text as the trading loss of the OP in euros.", + "image": null, + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__/f/wallstreetbets/136297", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "must_include": [ + "41624.56 |OR| 41,624.56" + ] + } + } + ] + }, + "intent_template_id": 59, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "__REDDIT__/f/wallstreetbets/136297" + }, + { + "sites": [ + "reddit" + ], + "task_id": 146, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/wallstreetbets/50335", + "geolocation": null, + "intent_template": "Leave a comment in this post with the text as the trading loss of the OP in dollars.", + "intent": "Leave a comment in this post with the text as the trading loss of the OP in dollars.", + "image": null, + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__/f/wallstreetbets/50335", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "must_include": [ + "209783.15 |OR| 209,783.15" + ] + } + } + ] + }, + "intent_template_id": 58, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "__REDDIT__/f/wallstreetbets/50335" + }, + { + "sites": [ + "reddit" + ], + "task_id": 147, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/food/18831", + "geolocation": null, + "intent_template": "Leave a comment in this post with the text as the number of {{item}} in the image.", + "intent": "Leave a comment in this post with the text as the number of buns in the image.", + "image": null, + "instantiation_dict": { + "item": "buns" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__/f/food/18831", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "must_include": [ + "10 |OR| ten" + ] + } + } + ] + }, + "intent_template_id": 60, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__REDDIT__/f/food/18831" + }, + { + "sites": [ + "reddit" + ], + "task_id": 148, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/food/60745", + "geolocation": null, + "intent_template": "Leave a comment in this post with the text as the number of {{item}} in the image.", + "intent": "Leave a comment in this post with the text as the number of arancinis in the image.", + "image": null, + "instantiation_dict": { + "item": "arancinis" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__/f/food/60745", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "must_include": [ + "2 |OR| two" + ] + } + } + ] + }, + "intent_template_id": 60, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__REDDIT__/f/food/60745" + }, + { + "sites": [ + "reddit" + ], + "task_id": 149, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/food/60641", + "geolocation": null, + "intent_template": "Leave a comment in this post with the text as the number of {{item}} in the image.", + "intent": "Leave a comment in this post with the text as the number of gyozas in the image.", + "image": null, + "instantiation_dict": { + "item": "gyozas" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__/f/food/60641", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "must_include": [ + "8 |OR| eight" + ] + } + } + ] + }, + "intent_template_id": 60, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__REDDIT__/f/food/60641" + }, + { + "sites": [ + "reddit" + ], + "task_id": 150, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/OldSchoolCool/35826", + "geolocation": null, + "intent_template": "Leave a comment in this post with the text as the number of {{item}} in the image.", + "intent": "Leave a comment in this post with the text as the number of adults in the image.", + "instantiation_dict": { + "item": "adults" + }, + "image": null, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__/f/OldSchoolCool/35826", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "must_include": [ + "2 |OR| two" + ] + } + } + ] + }, + "intent_template_id": 60, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__REDDIT__/f/OldSchoolCool/35826" + }, + { + "sites": [ + "reddit" + ], + "task_id": 151, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/OldSchoolCool/78885", + "geolocation": null, + "intent_template": "Leave a comment in this post with the text as the number of {{item}} in the image.", + "intent": "Leave a comment in this post with the text as the number of adults in the image.", + "instantiation_dict": { + "item": "adults" + }, + "image": null, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__/f/OldSchoolCool/78885", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "must_include": [ + "1 |OR| one" + ] + } + } + ] + }, + "intent_template_id": 60, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__REDDIT__/f/OldSchoolCool/78885" + }, + { + "sites": [ + "reddit" + ], + "task_id": 152, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/OldSchoolCool/15059", + "geolocation": null, + "intent_template": "Leave a comment in this post with the text as the number of {{item}} in the image.", + "intent": "Leave a comment in this post with the text as the number of adults in the image.", + "instantiation_dict": { + "item": "adults" + }, + "image": null, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "program_html": [ + { + "url": "__REDDIT__/f/OldSchoolCool/15059", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "must_include": [ + "0 |OR| zero" + ] + } + } + ] + }, + "intent_template_id": 60, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__REDDIT__/f/OldSchoolCool/15059" + }, + { + "sites": [ + "reddit" + ], + "task_id": 153, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Is there a version of this post with the final results? If so, navigate to the comment section for it.", + "intent": "Is there a version of this post with the final results? If so, navigate to the comment section for it.", + "image": "__REDDIT__/submission_images/a1ee91708d8c09894668f30cf64c963d05993a9dcda1a3f38107868475b3abb2.png", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/dataisbeautiful/60153/oc-radial-bracket-fifa-world-cup-2022-final-updated", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 61 + }, + { + "sites": [ + "reddit" + ], + "task_id": 154, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "{{intro}}. Can you tell me {{information}}?{{action}}", + "intent": "I think I saw a post in f/DataIsBeautiful that talked about the price of training ImageNet models. Can you tell me how much it cost in 2020 was to train a top-5 model with >93% accuracy? Finally, take me to the comments section of the post.", + "instantiation_dict": { + "intro": "I think I saw a post in f/DataIsBeautiful that talked about the price of training ImageNet models", + "information": "how much it cost in 2020 was to train a top-5 model with >93% accuracy", + "action": " Finally, take me to the comments section of the post." + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "must_include": [ + "7.43" + ] + }, + "reference_url": "__REDDIT__/f/dataisbeautiful/103920", + "url_note": "GOLD in PRED", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$7.43" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__REDDIT__/f/dataisbeautiful/103920/oc-the-cost-of-training-ai-on-imagenet-has-decreased-from", + "intent_template_id": 62 + }, + { + "sites": [ + "reddit" + ], + "task_id": 155, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/pics", + "geolocation": null, + "intent_template": "{{intro}}. Can you navigate to the comments section of a post {{description}}?", + "intent": "I saw a really funny movie last week (the image is the movie poster). Can you navigate to the comments section of a post that has the picture that was the inspiration for the IRS scene?", + "image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Everything_Everywhere_All_at_Once.jpg.webp", + "instantiation_dict": { + "intro": "I saw a really funny movie last week (the image is the movie poster)", + "description": "that has the picture that was the inspiration for the IRS scene" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/pics/131632", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 63 + }, + { + "sites": [ + "reddit" + ], + "task_id": 156, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "{{intro}}. Can you tell me {{information}}?{{action}}", + "intent": "I saw a fun Mario and Princess Peach cosplay in f/gaming. Can you tell me the top comment for that post?", + "instantiation_dict": { + "intro": "I saw a fun Mario and Princess Peach cosplay in f/gaming", + "information": "the top comment for that post", + "action": "" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Mama mia!" + ] + }, + "reference_url": "__REDDIT__/f/gaming/19625/finally-found-her-and-all-i-got-was-a-kiss-on-the-cheek", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Mama mia!" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 62 + }, + { + "sites": [ + "reddit" + ], + "task_id": 157, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/Music", + "geolocation": null, + "intent_template": "Can you take me to the comments section of {{post}} {{condition}}?", + "intent": "Can you take me to the comments section of the post in this subreddit that talks about how the person who wrote this symphony died?", + "image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Eroica_Beethoven_title.jpg.webp", + "instantiation_dict": { + "post": "the post in this subreddit", + "condition": "that talks about how the person who wrote this symphony died" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/Music/121269/ludwig-von-beethoven-s-dna-reveals-he-probably-died-of-liver", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "Text is very hard to read, even for a human.", + "intent_template_id": 64 + }, + { + "sites": [ + "reddit" + ], + "task_id": 158, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you subscribe to all subreddits that start with the letter '{{letter}}' and have a {{object}} image in their top 3 posts of all time?", + "instantiation_dict": { + "letter": "a", + "object": "dog" + }, + "intent": "Can you subscribe to all subreddits that start with the letter 'a' and have a dog image in their top 3 posts of all time?", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__", + "locator": "func:get_query_text(__page__, \"#sidebar > section > ul\")", + "required_contents": { + "must_include": [ + "aww" + ], + "must_exclude": [ + "allentown", + "arlingtonva", + "Art", + "AskReddit", + "askscience" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 65 + }, + { + "sites": [ + "reddit" + ], + "task_id": 159, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you subscribe to all subreddits that start with the letter '{{letter}}' and have a {{object}} image in their top 3 posts of all time?", + "instantiation_dict": { + "letter": "n", + "object": "downtown" + }, + "intent": "Can you subscribe to all subreddits that start with the letter 'n' and have a downtown image in their top 3 posts of all time?", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__", + "locator": "func:get_query_text(__page__, \"#sidebar > section > ul\")", + "required_contents": { + "must_include": [ + "Newark", + "nyc" + ], + "must_exclude": [ + "newhampshire", + "newhaven", + "newjersey", + "news", + "nosleep", + "nottheonion" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 65 + }, + { + "sites": [ + "reddit" + ], + "task_id": 160, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you subscribe to all subreddits that start with the letter '{{letter}}' and have a {{object}} image in their top 3 posts of all time?", + "instantiation_dict": { + "letter": "i", + "object": "female usb to male lightning connector" + }, + "intent": "Can you subscribe to all subreddits that start with the letter 'i' and have a female usb to male lightning connector image in their top 3 posts of all time?", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__", + "locator": "func:get_query_text(__page__, \"#sidebar > section > ul\")", + "required_contents": { + "must_exclude": [ + "IAmA", + "InternetIsBeautiful", + "iphone" + ] + } + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 65 + }, + { + "sites": [ + "reddit" + ], + "task_id": 161, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/Jokes", + "geolocation": null, + "intent_template": "Take me to the post for the joke on this page that is most likely to take place in the following image?", + "intent": "Take me to the post for the joke on this page that is most likely to take place in the following image?", + "image": "https://images.pexels.com/photos/4969841/pexels-photo-4969841.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/Jokes/55249/a-priest-and-a-nun-in-a-desert-cabin", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "The post mispells 'deserted' as 'desert' which makes it hard to find.", + "intent_template_id": 66 + }, + { + "sites": [ + "reddit" + ], + "task_id": 162, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you give me the URL for a gif I could send to my friend about the contrast of investing in a retirement account vs a brokerage account from the hot posts of /f/wallstreetbets?", + "intent": "Can you give me the URL for a gif I could send to my friend about the contrast of investing in a retirement account vs a brokerage account from the hot posts of /f/wallstreetbets?", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "__REDDIT__/submission_images/73199932fc7c826edb8313f0993483b6982ea7d3ea368e8d904eab71cb642e2e.gif" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__REDDIT__/submission_images/73199932fc7c826edb8313f0993483b6982ea7d3ea368e8d904eab71cb642e2e.gif" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 67 + }, + { + "sites": [ + "reddit", + "wikipedia" + ], + "task_id": 163, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__ |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Up_(2009_film)", + "geolocation": null, + "intent_template": "What is the top comment for the most popular post of all time (across all forums) containing {{subject}} in the Wikipedia tab?", + "instantiation_dict": { + "subject": "a character from the movie" + }, + "intent": "What is the top comment for the most popular post of all time (across all forums) containing a character from the movie in the Wikipedia tab?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "https://i.imgur.com/zmkfQ8F" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "https://i.imgur.com/zmkfQ8F" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "Post is __REDDIT__/f/photoshopbattles/45340/psbattle-halloween-costume", + "intent_template_id": 68 + }, + { + "sites": [ + "reddit", + "wikipedia" + ], + "task_id": 164, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__ |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Digging", + "geolocation": null, + "intent_template": "What is the top comment for the most popular post of all time (across all forums) containing {{subject}} in the Wikipedia tab?", + "instantiation_dict": { + "subject": "a dog trying to perform the action" + }, + "intent": "What is the top comment for the most popular post of all time (across all forums) containing a dog trying to perform the action in the Wikipedia tab?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "I\u2019ve found that Dachshunds are rarely accused of having small or light personalities." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "I\u2019ve found that Dachshunds are rarely accused of having small or light personalities." + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "Post is __REDDIT__/f/gifs/19936/ok-time-for-you-to-go-to-bed hard visual because the agent needs to understand a moving gif", + "intent_template_id": 68 + }, + { + "sites": [ + "reddit" + ], + "task_id": 165, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you find {{post}} and message the poster '{{message}}'?", + "image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Fallingwater3.jpg.webp", + "instantiation_dict": { + "post": "art of the building in the picture", + "message": "I really like this picture! Can I print it out and hang it on my wall?" + }, + "intent": "Can you find art of the building in the picture and message the poster 'I really like this picture! Can I print it out and hang it on my wall?'?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/messages", + "locator": "func:get_query_text(__page__, \"#main > table > tbody\")", + "required_contents": { + "must_include": [ + "put_the_ux_in_sux", + "I really like this picture! Can I print it out and hang it on my wall?" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__REDDIT__/f/pittsburgh/89846/fallingwater-by-lisa-frank-lloyd-wright", + "intent_template_id": 69 + }, + { + "sites": [ + "reddit" + ], + "task_id": 166, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you find {{post}} and message the poster '{{message}}'?", + "instantiation_dict": { + "post": "a post of the International Space Station's solar panels with the Earth in the background", + "message": "Was the sun super bright in space?" + }, + "intent": "Can you find a post of the International Space Station's solar panels with the Earth in the background and message the poster 'Was the sun super bright in space?'?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/messages", + "locator": "func:get_query_text(__page__, \"#main > table > tbody\")", + "required_contents": { + "must_include": [ + "astro_pettit |OR| Hazelil", + "Was the sun super bright in space?" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__REDDIT__/f/space/27376/a-view-i-captured-of-the-sun-between-iss-solar-panels-more |OR| __REDDIT__/f/space/92045/ireland-seen-from-the-international-space-station", + "intent_template_id": 69 + }, + { + "sites": [ + "reddit" + ], + "task_id": 167, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "{{intro}}. Can you navigate to the comments section of a post {{description}}?", + "instantiation_dict": { + "intro": "I am feeling sad today", + "description": "that has an image of a dog and a cat to cheer me up" + }, + "intent": "I am feeling sad today. Can you navigate to the comments section of a post that has an image of a dog and a cat to cheer me up?", + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__row", + "eval_vqa": [ + { + "question": "Is there a cat in this photo? (yes/no)", + "answer": "yes" + }, + { + "question": "Is there a dog in this photo? (yes/no)", + "answer": "yes" + } + ] + } + ], + "reference_url": "", + "program_html": [], + "string_note": "" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "An example is at __REDDIT__/f/aww/81028/cute-buddies", + "intent_template_id": 63 + }, + { + "sites": [ + "reddit" + ], + "task_id": 168, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/pics?next%5Branking%5D=1773&next%5Bid%5D=67091", + "geolocation": null, + "intent_template": "Can you repost the image on this page that would fit best in {{subreddit}} with the same title and text as the original post and take me to the comments section of the post?", + "instantiation_dict": { + "subreddit": "f/EarthPorn" + }, + "intent": "Can you repost the image on this page that would fit best in f/EarthPorn with the same title and text as the original post and take me to the comments section of the post?", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:get_query_text(__page__, \"#main > article > div > div.submission__inner > header\")", + "required_contents": { + "must_include": [ + "Aurora Borealis looking in the Direction to Svalbard flying from Lahore to Toronto (my pic) \ud83c\udf42\ud83d\udcab\u2728", + "MarvelsGrantMan136", + "EarthPorn" + ] + } + } + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_fuzzy_image_match": "__REDDIT__/submission_images/1972dfbb29553ecc270ae83ab679d218f425d478136f1f524a09047e299a2cef.jpg" + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 70 + }, + { + "sites": [ + "reddit" + ], + "task_id": 169, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/pics/top?t=all&next%5BnetScore%5D=589&next%5Bid%5D=67196", + "geolocation": null, + "intent_template": "Can you repost the image on this page that would fit best in {{subreddit}} with the same title and text as the original post and take me to the comments section of the post?", + "instantiation_dict": { + "subreddit": "f/sports" + }, + "intent": "Can you repost the image on this page that would fit best in f/sports with the same title and text as the original post and take me to the comments section of the post?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:get_query_text(__page__, \"#main > article > div > div.submission__inner > header\")", + "required_contents": { + "must_include": [ + "Pele, Ali, Jordan, Montana, Brown, Kareem, Evert, King, Nicklaus, Russell, Elway and Holyfield", + "MarvelsGrantMan136", + "sports" + ] + } + } + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_fuzzy_image_match": "__REDDIT__/submission_images/539a1a709c696de5b7c3bb8d8df51fd2b5cff90b91de30da1e6e13f88b6d71a6.jpg" + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 70 + }, + { + "sites": [ + "reddit" + ], + "task_id": 170, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you subscribe to the forum {{condition}}?", + "image": "https://images.pexels.com/photos/1884574/pexels-photo-1884574.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "condition": "that is most likely to contain posts like this" + }, + "intent": "Can you subscribe to the forum that is most likely to contain posts like this?", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__", + "locator": "func:get_query_text(__page__, \"#sidebar > section > ul\")", + "required_contents": { + "must_include": [ + "sports" + ] + } + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 71 + }, + { + "sites": [ + "reddit" + ], + "task_id": 171, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you subscribe to the forum {{condition}}?", + "image": "https://images.pexels.com/photos/2115257/pexels-photo-2115257.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "condition": "that is most likely to contain posts like this" + }, + "intent": "Can you subscribe to the forum that is most likely to contain posts like this?", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__", + "locator": "func:get_query_text(__page__, \"#sidebar > section > ul\")", + "required_contents": { + "must_include": [ + "MechanicalKeyboards" + ] + } + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 71 + }, + { + "sites": [ + "reddit" + ], + "task_id": 172, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/jerseycity", + "geolocation": null, + "intent_template": "Can you post a comment on the post {{description}} with the text '{{comment}}'?", + "instantiation_dict": { + "description": "that has a crispy chicken sandwhich", + "comment": "I love those things!" + }, + "intent": "Can you post a comment on the post that has a crispy chicken sandwhich with the text 'I love those things!'?", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/jerseycity/62526/because-all-we-have-in-life-is-our-word-and-fake-internet", + "locator": "func:get_query_text(__page__, \"#main\")", + "required_contents": { + "must_include": [ + "MarvelsGrantMan136", + "I love those things!" + ] + } + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "Occulded text is hard to read.", + "intent_template_id": 72 + }, + { + "sites": [ + "reddit" + ], + "task_id": 173, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/memes/top?t=all", + "geolocation": null, + "intent_template": "Can you post a comment on the post {{description}} with the text '{{comment}}'?", + "instantiation_dict": { + "description": "with an \ud83d\udc7d in it", + "comment": "Scary!" + }, + "intent": "Can you post a comment on the post with an \ud83d\udc7d in it with the text 'Scary!'?", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/memes/21032/at-lease-is-not-brazil", + "locator": "func:get_query_text(__page__, \"#main\")", + "required_contents": { + "must_include": [ + "MarvelsGrantMan136", + "Scary!" + ] + } + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 72 + }, + { + "sites": [ + "reddit" + ], + "task_id": 174, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/gifs", + "geolocation": null, + "intent_template": "Can you post a comment on the post {{description}} with the text '{{comment}}'?", + "instantiation_dict": { + "description": "with a \ud83d\udef9 in it", + "comment": "\ud83c\udfce\ufe0f\ud83c\udfce\ufe0f\ud83c\udfce\ufe0f" + }, + "intent": "Can you post a comment on the post with a \ud83d\udef9 in it with the text '\ud83c\udfce\ufe0f\ud83c\udfce\ufe0f\ud83c\udfce\ufe0f'?", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/gifs/19938/what-goes-around-comes-around", + "locator": "func:get_query_text(__page__, \"#main\")", + "required_contents": { + "must_include": [ + "MarvelsGrantMan136", + "\ud83c\udfce\ufe0f\ud83c\udfce\ufe0f\ud83c\udfce\ufe0f" + ] + } + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "Needs to see animated gif to complete task.", + "intent_template_id": 72 + }, + { + "sites": [ + "reddit" + ], + "task_id": 175, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "How much money was user {{user}}'s {{security}} worth {{timeframe}} in their screenshot (in {{currency}})?", + "instantiation_dict": { + "user": "cezapiza", + "security": "SPY puts", + "timeframe": "the day before", + "currency": "USD" + }, + "intent": "How much money was user cezapiza's SPY puts worth the day before in their screenshot (in USD)?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "360" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$360" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "Post is __REDDIT__/f/wallstreetbets/115267/up-60-on-my-spy-puts-today", + "intent_template_id": 73 + }, + { + "sites": [ + "reddit" + ], + "task_id": 176, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/food", + "geolocation": null, + "intent_template": "Can you comment on the food from the country in the image and about {{description}} saying '{{comment}}'?", + "instantiation_dict": { + "description": "\ud83c\udf5c", + "comment": "Sooo tasty!" + }, + "image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Extreme_points_japan_map.png.webp", + "intent": "Can you comment on the food from the country in the image and about \ud83c\udf5c saying 'Sooo tasty!'?", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/food/104562/i-ate-tonkotsu-ramen", + "locator": "func:get_query_text(__page__, \"#main\")", + "required_contents": { + "must_include": [ + "MarvelsGrantMan136", + "Sooo tasty!" + ] + } + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 74 + }, + { + "sites": [ + "reddit" + ], + "task_id": 177, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/food?next%5Branking%5D=1727&next%5Bid%5D=82778", + "geolocation": null, + "intent_template": "Can you comment on the food from the country in the image and about {{description}} saying '{{comment}}'?", + "instantiation_dict": { + "description": "a dog in the background", + "comment": "Someone is looking for a bite \ud83d\udc36" + }, + "image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/South_Korea_(orthographic_projection).svg.png.webp", + "intent": "Can you comment on the food from the country in the image and about a dog in the background saying 'Someone is looking for a bite \ud83d\udc36'?", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/food/82838/tteukboki-homemade", + "locator": "func:get_query_text(__page__, \"#main\")", + "required_contents": { + "must_include": [ + "MarvelsGrantMan136", + "Someone is looking for a bite \ud83d\udc36" + ] + } + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 74 + }, + { + "sites": [ + "reddit" + ], + "task_id": 178, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you subscribe to the forum {{condition}}?", + "image": "https://images.pexels.com/photos/7435463/pexels-photo-7435463.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "condition": "for this city" + }, + "intent": "Can you subscribe to the forum for this city?", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__", + "locator": "func:get_query_text(__page__, \"#sidebar > section > ul\")", + "required_contents": { + "must_include": [ + "nyc" + ] + } + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "The image is of the 'Vessel' in New York City.", + "intent_template_id": 71 + }, + { + "sites": [ + "reddit" + ], + "task_id": 179, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you take me to {{sorting}} in the forum for {{location}}?", + "instantiation_dict": { + "sorting": "the page that shows the most controversial posts of the past month", + "location": "a city located in the state pictured in the image" + }, + "image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Map_of_USA_MO.svg.png.webp", + "intent": "Can you take me to the page that shows the most controversial posts of the past month in the forum for a city located in the state pictured in the image?", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/springfieldMO/controversial?t=month", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 75 + }, + { + "sites": [ + "reddit" + ], + "task_id": 180, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you take me to {{sorting}} in the forum for {{location}}?", + "instantiation_dict": { + "sorting": "the newest posts", + "location": "the highlighted county in the image" + }, + "image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Map_of_Virginia_highlighting_Arlington_County.svg.png.webp", + "intent": "Can you take me to the newest posts in the forum for the highlighted county in the image?", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/arlingtonva/new", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 75 + }, + { + "sites": [ + "reddit" + ], + "task_id": 181, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "I am a student at the school in the image. Can you make a post asking for the best {{location}} in the forum for the city my school is in and navigate to it?", + "instantiation_dict": { + "location": "pizza" + }, + "image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Yale_family_chrest.jpg.webp", + "intent": "I am a student at the school in the image. Can you make a post asking for the best pizza in the forum for the city my school is in and navigate to it?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:get_query_text(__page__, \"#main > article > div > div.submission__inner > header\")", + "required_contents": { + "must_include": [ + "pizza", + "newhaven", + "MarvelsGrantMan136" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "The text in the image looks like 'Vale' but the agent needs to know it's Yale.", + "intent_template_id": 76 + }, + { + "sites": [ + "reddit" + ], + "task_id": 182, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "I am a student at the school in the image. Can you make a post asking for the best {{location}} in the forum for the city my school is in and navigate to it?", + "instantiation_dict": { + "location": "Indian food" + }, + "image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Stanstadium_view.jpg.webp", + "intent": "I am a student at the school in the image. Can you make a post asking for the best Indian food in the forum for the city my school is in and navigate to it?", + "require_reset": true, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "string_note": "There is no forum for Palo Alto, where Stanford is located.", + "reference_url": "" + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "Hard to read text off the football field and has to recognize that StamfordCT is not where Stanford is located. Also there is no forum for Palo Alto.", + "intent_template_id": 76 + }, + { + "sites": [ + "reddit", + "shopping" + ], + "task_id": 183, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__ |AND| __SHOPPING__/grocery-gourmet-food/fresh-meal-kits.html?p=5&product_list_order=name", + "geolocation": null, + "intent_template": "Can you make a post in f/food, title it '{{title}}', add the full sized version of the image with {{food}}, and navigate to it?", + "instantiation_dict": { + "title": "[I ate] this", + "food": "the pop tart on this page of OneStopMarket" + }, + "intent": "Can you make a post in f/food, title it '[I ate] this', add the full sized version of the image with the pop tart on this page of OneStopMarket, and navigate to it?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:get_query_text(__page__, \"#main > article > div > div.submission__inner > header\")", + "required_contents": { + "must_include": [ + "[I ate] this", + "MarvelsGrantMan136", + "food" + ] + } + } + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_fuzzy_image_match": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B08JQRPJBR.0.jpg" + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 77 + }, + { + "sites": [ + "reddit", + "shopping" + ], + "task_id": 184, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__ |AND| __SHOPPING__/grocery-gourmet-food/dairy-cheese-eggs/cheese.html?p=4&product_list_order=name", + "geolocation": null, + "intent_template": "Can you make a post in f/food, title it '{{title}}', add the full sized version of the image with {{food}}, and navigate to it?", + "instantiation_dict": { + "title": "[HOMEMADE] this yummy cheese", + "food": "the semicircle cheese" + }, + "intent": "Can you make a post in f/food, title it '[HOMEMADE] this yummy cheese', add the full sized version of the image with the semicircle cheese, and navigate to it?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:get_query_text(__page__, \"#main > article > div > div.submission__inner > header\")", + "required_contents": { + "must_include": [ + "[HOMEMADE] this yummy cheese", + "MarvelsGrantMan136", + "food" + ] + } + } + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_fuzzy_image_match": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B009P9HODS.0.jpg |OR| media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B009P9HODS.1.jpg" + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 77 + }, + { + "sites": [ + "reddit", + "shopping", + "wikipedia" + ], + "task_id": 185, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__ |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Pok\u00e9mon |AND| __SHOPPING__/video-games/legacy-systems/playstation-systems.html?p=3", + "geolocation": null, + "intent_template": "Can you make a post in {{forum}} titled '{{title}}', add the full sized version of the image on the shopping page most related to the wikipedia page? Finally, navigate to the comments section of it.", + "instantiation_dict": { + "forum": "the /f/gaming forum", + "title": "Should I buy this? I can get a 25% discount" + }, + "intent": "Can you make a post in the /f/gaming forum titled 'Should I buy this? I can get a 25% discount', add the full sized version of the image on the shopping page most related to the wikipedia page? Finally, navigate to the comments section of it.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:get_query_text(__page__, \"#main > article > div > div.submission__inner > header\")", + "required_contents": { + "must_include": [ + "Should I buy this? I can get a 25% discount", + "MarvelsGrantMan136", + "gaming" + ] + } + } + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_fuzzy_image_match": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B07CMWR38H.0.jpg" + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 78 + }, + { + "sites": [ + "reddit", + "shopping", + "wikipedia" + ], + "task_id": 186, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__ |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Rudolph_the_Red-Nosed_Reindeer%3A_The_Movie |AND| __SHOPPING__/sports-outdoors/sports.html?p=5", + "geolocation": null, + "intent_template": "Can you make a post in {{forum}} titled '{{title}}', add the full sized version of the image on the shopping page most related to the wikipedia page? Finally, navigate to the comments section of it.", + "instantiation_dict": { + "forum": "the /f/aww forum", + "title": "Perfect for the winter holidays!" + }, + "intent": "Can you make a post in the /f/aww forum titled 'Perfect for the winter holidays!', add the full sized version of the image on the shopping page most related to the wikipedia page? Finally, navigate to the comments section of it.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:get_query_text(__page__, \"#main > article > div > div.submission__inner > header\")", + "required_contents": { + "must_include": [ + "Perfect for the winter holidays!", + "MarvelsGrantMan136", + "aww" + ] + } + } + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_fuzzy_image_match": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09KVCRK1S.0.jpg |OR| __SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09KVCRK1S.1.jpg" + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 78 + }, + { + "sites": [ + "reddit", + "shopping", + "wikipedia" + ], + "task_id": 187, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__ |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Yoda |AND| __SHOPPING__/home-kitchen/bedding/kids-bedding.html?product_list_order=name&product_list_dir=desc", + "geolocation": null, + "intent_template": "Can you make a post in {{forum}} titled '{{title}}', add the full sized version of the image on the shopping page most related to the wikipedia page? Finally, navigate to the comments section of it.", + "instantiation_dict": { + "forum": "the forum for the capital of Rhode Island (not the one for the state)", + "title": "Perfect for the cold weather" + }, + "intent": "Can you make a post in the forum for the capital of Rhode Island (not the one for the state) titled 'Perfect for the cold weather', add the full sized version of the image on the shopping page most related to the wikipedia page? Finally, navigate to the comments section of it.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:get_query_text(__page__, \"#main > article > div > div.submission__inner > header\")", + "required_contents": { + "must_include": [ + "Perfect for the cold weather", + "MarvelsGrantMan136", + "providence" + ] + } + } + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_fuzzy_image_match": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09LQ95N1N.0.jpg |OR| __SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09LQ95N1N.1.jpg" + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 78 + }, + { + "sites": [ + "reddit" + ], + "task_id": 188, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you subscribe me to the most suitable forum where I can find more posts like in the image?", + "image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Apple_iPhone_12_Pro_-_Cameras_(50535314721).jpg.webp", + "instantiation_dict": {}, + "intent": "Can you subscribe me to the most suitable forum where I can find more posts like in the image?", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__", + "locator": "func:get_query_text(__page__, \"#sidebar > section > ul\")", + "required_contents": { + "must_include": [ + "iphone |OR| technology" + ] + } + } + ] + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 79 + }, + { + "sites": [ + "reddit" + ], + "task_id": 189, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you subscribe me to the most suitable forum where I can find more posts like in the image?", + "image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Typical_cnn.png.webp", + "instantiation_dict": {}, + "intent": "Can you subscribe me to the most suitable forum where I can find more posts like in the image?", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__", + "locator": "func:get_query_text(__page__, \"#sidebar > section > ul\")", + "required_contents": { + "must_include": [ + "MachineLearning |OR| deeplearning |OR| singularity" + ] + } + } + ] + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 79 + }, + { + "sites": [ + "reddit" + ], + "task_id": 190, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you subscribe me to the most suitable forum where I can find more posts like in the image?", + "image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Pieter_Aertsen_-_Market_Scene_-_Google_Art_ProjectFXD.jpg.webp", + "instantiation_dict": {}, + "intent": "Can you subscribe me to the most suitable forum where I can find more posts like in the image?", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__", + "locator": "func:get_query_text(__page__, \"#sidebar > section > ul\")", + "required_contents": { + "must_include": [ + "Art" + ] + } + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 79 + }, + { + "sites": [ + "reddit" + ], + "task_id": 191, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/iphone/top?t=all", + "geolocation": null, + "intent_template": "What is the sum of the number of upvotes for the posts on this page that have {{item}}?", + "instantiation_dict": { + "item": "any part of a physical phone" + }, + "intent": "What is the sum of the number of upvotes for the posts on this page that have any part of a physical phone?", + "require_reset": true, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "3463 |OR| 3,463" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3463" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "1) __REDDIT__/f/iphone/106148/apple-really-should-bring-back-the-rose-gold-color-option-it 2) __REDDIT__/f/iphone/41010/strong-magnets 3) __REDDIT__/f/iphone/62333/this-mesh-thing-fell-out-of-my-iphone-13-pro-is-it-a-big", + "intent_template_id": 80 + }, + { + "sites": [ + "reddit" + ], + "task_id": 192, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/aww/most_commented?t=all", + "geolocation": null, + "intent_template": "What is the sum of the number of upvotes for the posts on this page that have {{item}}?", + "instantiation_dict": { + "item": "three animals" + }, + "intent": "What is the sum of the number of upvotes for the posts on this page that have three animals?", + "require_reset": true, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "3566 |OR| 3,566" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3566" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "1) __REDDIT__/f/aww/80942/oc-what-do-you-suspect-they-want 2) __REDDIT__/f/aww/102762/they-think-the-couch-is-theirs-oc", + "intent_template_id": 80 + }, + { + "sites": [ + "reddit" + ], + "task_id": 193, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/EarthPorn/top?t=all", + "geolocation": null, + "intent_template": "Can you comment '{{comment}}' on the post with {{item}} posted {{timeframe}}?", + "instantiation_dict": { + "comment": "I love this!", + "item": "a blue/turquoise aurora", + "timeframe": "in the fall" + }, + "intent": "Can you comment 'I love this!' on the post with a blue/turquoise aurora posted in the fall?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/EarthPorn/12673/image-of-northern-lights-i-shot-in-senja-norway-2-weeks-ag", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "exact_match": "I love this!" + } + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 81 + }, + { + "sites": [ + "reddit" + ], + "task_id": 194, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/MechanicalKeyboards/most_commented?t=all", + "geolocation": null, + "intent_template": "Can you comment '{{comment}}' on the post with {{item}} posted {{timeframe}}?", + "instantiation_dict": { + "comment": "This looks really fun to type on!", + "item": "a single keyboard with a number pad", + "timeframe": "on Halloween (in UTC)" + }, + "intent": "Can you comment 'This looks really fun to type on!' on the post with a single keyboard with a number pad posted on Halloween (in UTC)?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/MechanicalKeyboards/14466/any-thoughts-on-layout-idea", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "exact_match": "This looks really fun to type on!" + } + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "Has to look at a lot of images, and some have unexpected layouts.", + "intent_template_id": 81 + }, + { + "sites": [ + "reddit" + ], + "task_id": 195, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/MechanicalKeyboards/top?t=all&next%5BnetScore%5D=2415&next%5Bid%5D=78388", + "geolocation": null, + "intent_template": "Can you comment '{{comment}}' on the post with {{item}} posted {{timeframe}}?", + "instantiation_dict": { + "comment": "Wish I had this for my setup", + "item": "a pink escape key", + "timeframe": "after 8pm UTC" + }, + "intent": "Can you comment 'Wish I had this for my setup' on the post with a pink escape key posted after 8pm UTC?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/MechanicalKeyboards/120858/literally-found-this-guy-in-a-giant-ewaste-pile-at-my", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "exact_match": "Wish I had this for my setup" + } + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 81 + }, + { + "sites": [ + "reddit" + ], + "task_id": 196, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you take me to the comments section of {{post}} {{condition}}?", + "instantiation_dict": { + "post": "the hottest post (of all time) in the art forum", + "condition": "where the post is only in black and white" + }, + "intent": "Can you take me to the comments section of the hottest post (of all time) in the art forum where the post is only in black and white?", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/Art/30842/moods-of-maizie-by-me-pen-2022", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 64 + }, + { + "sites": [ + "reddit" + ], + "task_id": 197, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you take me to the comments section of {{post}} {{condition}}?", + "instantiation_dict": { + "post": "the most commented post (of all time) in f/food", + "condition": "where the food is vegetarian" + }, + "intent": "Can you take me to the comments section of the most commented post (of all time) in f/food where the food is vegetarian?", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/food/18823/i-ate-maple-pecan-croissant", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 64 + }, + { + "sites": [ + "reddit" + ], + "task_id": 198, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you comment '{{comment}}' on the top post (of all time) in {{forum}} that relates to the image?", + "instantiation_dict": { + "comment": "\ud80c\udc20\ud80c\udc20", + "forum": "f/history" + }, + "image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/M\u00e5rten_Eskil_Winge_-_Tor's_Fight_with_the_Giants_-_Google_Art_Project.jpg.webp", + "intent": "Can you comment '\ud80c\udc20\ud80c\udc20' on the top post (of all time) in f/history that relates to the image?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/history/20281/thor-s-hammer-amulet-discovered-in-sweden-arkeonews", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "exact_match": "\ud80c\udc20\ud80c\udc20" + } + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "", + "intent_template_id": 82 + }, + { + "sites": [ + "reddit" + ], + "task_id": 199, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Can you comment '{{comment}}' on the top post (of all time) in {{forum}} that relates to the image?", + "instantiation_dict": { + "comment": "I remember seeing him in person, one of the best nights of my life.", + "forum": "f/music" + }, + "image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Michael_Jackson_first_moonwalk_1983.gif", + "intent": "Can you comment 'I remember seeing him in person, one of the best nights of my life.' on the top post (of all time) in f/music that relates to the image?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/Music/35453/michael-jackson-s-thriller-returns-to-billboard-top-10-after", + "locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')", + "required_contents": { + "exact_match": "I remember seeing him in person, one of the best nights of my life." + } + } + ] + }, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 82 + }, + { + "sites": [ + "reddit" + ], + "task_id": 200, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/explainlikeimfive", + "geolocation": null, + "intent_template": "Can you take me to the post on this page that is most related to the picture on this page?", + "instantiation_dict": {}, + "image": "https://images.pexels.com/photos/8599635/pexels-photo-8599635.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "intent": "Can you take me to the post on this page that is most related to the picture on this page?", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/explainlikeimfive/125106/eli5-why-are-pickles-not-just-called-pickled-cucumbers", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 83 + }, + { + "sites": [ + "reddit" + ], + "task_id": 201, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/explainlikeimfive", + "geolocation": null, + "intent_template": "Can you take me to the post on this page that is most related to the picture on this page?", + "instantiation_dict": {}, + "image": "https://images.pexels.com/photos/4045606/pexels-photo-4045606.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "intent": "Can you take me to the post on this page that is most related to the picture on this page?", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/explainlikeimfive/60436/eli5-why-does-putting-one-foot-out-from-under-the-blankets", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 83 + }, + { + "sites": [ + "reddit" + ], + "task_id": 202, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/Art?next%5Branking%5D=10246&next%5Bid%5D=51658", + "geolocation": null, + "intent_template": "Can you message the users who posted {{condition}} on this page with '{{message}}'?", + "instantiation_dict": { + "condition": "art with an animal (including stuffed animals)", + "message": "Animals are so cute, and I really like your work!" + }, + "intent": "Can you message the users who posted art with an animal (including stuffed animals) on this page with 'Animals are so cute, and I really like your work!'?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/messages", + "locator": "func:get_query_text(__page__, \"#main > table > tbody\")", + "required_contents": { + "must_include": [ + "iwouldtradetheworld", + "10seas", + "cryptowallet77", + "sarcasticsnowleopard", + "kookaburra_kool", + "Psycho_Ravager", + "houselegs", + "sasquatchinheat", + "Horsenart95", + "ika_simpson", + "VedaDragon", + "Animals are so cute, and I really like your work!" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "Needs to message lots of different people and some of the animals are quite hard to spot.", + "intent_template_id": 84 + }, + { + "sites": [ + "reddit" + ], + "task_id": 203, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/gaming?next%5Branking%5D=712&next%5Bid%5D=83741", + "geolocation": null, + "intent_template": "Can you message the users who posted {{condition}} on this page with '{{message}}'?", + "instantiation_dict": { + "condition": "pictures related to Nintendo games", + "message": "Mama mia!!" + }, + "intent": "Can you message the users who posted pictures related to Nintendo games on this page with 'Mama mia!!'?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/messages", + "locator": "func:get_query_text(__page__, \"#main > table > tbody\")", + "required_contents": { + "must_include": [ + "evolve_into_wizards", + "afieldbleed47", + "devilmaycry10092", + "peen_was", + "Ghimzzo", + "ENateFak", + "dorgobar", + "Mama mia!!" + ] + } + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "Did not check for three ambiguous posts __REDDIT__/f/gaming/61510/i-was-cleaning-my-basement-and-found-some-games-i-saved-from and __REDDIT__/f/gaming/105197/updated-game-room and __REDDIT__/f/gaming/126167/welp", + "intent_template_id": 84 + }, + { + "sites": [ + "reddit" + ], + "task_id": 204, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/space", + "geolocation": null, + "intent_template": "Can you take me to the comments section of {{post}} {{condition}}?", + "instantiation_dict": { + "post": "the last post on this page", + "condition": "that has a picture of a person" + }, + "intent": "Can you take me to the comments section of the last post on this page that has a picture of a person?", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/space/134033/3-years-ago-i-built-this-telescope-in-my-parents-garage-it-s", + "program_html": [], + "url_note": "EXACT" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 64 + }, + { + "sites": [ + "reddit" + ], + "task_id": 205, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/space?next%5Branking%5D=1321&next%5Bid%5D=27417", + "geolocation": null, + "intent_template": "What is the top comment on the post about {{condition}}?", + "instantiation_dict": { + "condition": "the company founded by the guy in the image" + }, + "image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/2019-06-11-tesla-shareholder-meeting-elon-musk.jpg.webp", + "intent": "What is the top comment on the post about the company founded by the guy in the image?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Sounds to me like Amazon has too many fingers in too many pies." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Sounds to me like Amazon has too many fingers in too many pies." + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "Guy in the picture is Elon Musk and he founded SpaceX.", + "intent_template_id": 85 + }, + { + "sites": [ + "reddit" + ], + "task_id": 206, + "require_login": false, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/EarthPorn/most_commented?t=all&next%5BcommentCount%5D=89&next%5Bid%5D=76289", + "geolocation": null, + "intent_template": "What is the top comment on the post about {{condition}}?", + "instantiation_dict": { + "condition": "the thing that looks like the image" + }, + "image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Crystal_skull_british_museum_random9834672.jpg.webp", + "intent": "What is the top comment on the post about the thing that looks like the image?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Why not Zoidberg?" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Why not Zoidberg?" + }, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "", + "intent_template_id": 85 + }, + { + "sites": [ + "reddit", + "shopping" + ], + "task_id": 207, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__ |AND| __SHOPPING__/sony-wh-1000xm4-wireless-noise-canceling-over-ear-headphones-black-with-sony-wla-ns7-wireless-tv-adapter-bundle-2-items.html |AND| __SHOPPING__/bang-olufsen-beoplay-hx-comfortable-wireless-anc-over-ear-headphones-timber.html", + "geolocation": null, + "intent_template": "Can you first make a post in {{forum1}} (then afterwards in {{forum2}}) asking for a comparison between the two {{item}} in the shopping tabs? Make sure to include both their SKUs in the title and a picture of the {{condition}}.", + "instantiation_dict": { + "forum1": "f/headphones", + "forum2": "f/gaming", + "item": "headphones", + "condition": "darker one" + }, + "intent": "Can you first make a post in f/headphones (then afterwards in f/gaming) asking for a comparison between the two headphones in the shopping tabs? Make sure to include both their SKUs in the title and a picture of the darker one.", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/MarvelsGrantMan136", + "locator": "func:get_query_text(__page__, \"#main > article:nth-child(4)\")", + "required_contents": { + "must_include": [ + "headphones", + "B09M79MQYD", + "B08YJ3W1HB" + ] + } + }, + { + "url": "__REDDIT__/user/MarvelsGrantMan136", + "locator": "func:get_query_text(__page__, \"#main > article:nth-child(3)\")", + "required_contents": { + "must_include": [ + "gaming", + "B09M79MQYD", + "B08YJ3W1HB" + ] + } + } + ], + "page_image_query": [ + { + "eval_image_url": "__REDDIT__/user/MarvelsGrantMan136", + "eval_image_class": ".submission__row", + "eval_fuzzy_image_match": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09M79MQYD.0.jpg |OR| __SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09M79MQYD.1.jpg", + "ssim_threshold": 0.6 + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 86 + }, + { + "sites": [ + "reddit", + "shopping" + ], + "task_id": 208, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__ |AND| __SHOPPING__/rca-43-4k-ultra-hd-2160p-hdr-roku-smart-led-tv-rtru4327-us.html |AND| __SHOPPING__/pioneer-43-inch-class-led-4k-uhd-smart-fire-tv-pn43951-22u-2021-model.html", + "geolocation": null, + "intent_template": "Can you first make a post in {{forum1}} (then afterwards in {{forum2}}) asking for a comparison between the two {{item}} in the shopping tabs? Make sure to include both their SKUs in the title and a picture of the {{condition}}.", + "instantiation_dict": { + "forum1": "f/monitor", + "forum2": "f/gadgets", + "item": "monitors", + "condition": "monitor with the movie titles" + }, + "intent": "Can you first make a post in f/monitor (then afterwards in f/gadgets) asking for a comparison between the two monitors in the shopping tabs? Make sure to include both their SKUs in the title and a picture of the monitor with the movie titles.", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/MarvelsGrantMan136", + "locator": "func:get_query_text(__page__, \"#main > article:nth-child(4)\")", + "required_contents": { + "must_include": [ + "monitor", + "B09F3TW5CP", + "B09SBS75XB" + ] + } + }, + { + "url": "__REDDIT__/user/MarvelsGrantMan136", + "locator": "func:get_query_text(__page__, \"#main > article:nth-child(3)\")", + "required_contents": { + "must_include": [ + "gadgets", + "B09F3TW5CP", + "B09SBS75XB" + ] + } + } + ], + "page_image_query": [ + { + "eval_image_url": "__REDDIT__/user/MarvelsGrantMan136", + "eval_image_class": ".submission__row", + "eval_fuzzy_image_match": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09F3TW5CP.0.jpg |OR| __SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09F3TW5CP.1.jpg", + "ssim_threshold": 0.6 + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 86 + }, + { + "sites": [ + "reddit", + "shopping" + ], + "task_id": 209, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__ |AND| __SHOPPING__/ultra-game-nfl-men-s-full-zip-soft-fleece-hoodie-letterman-varsity-jacket.html |AND| __SHOPPING__/ultra-game-nfl-men-s-active-long-sleeve-tee-shirt.html", + "geolocation": null, + "intent_template": "Can you first make a post in {{forum1}} (then afterwards in {{forum2}}) asking for a comparison between the two {{item}} in the shopping tabs? Make sure to include both their SKUs in the title and a picture of the {{condition}}.", + "instantiation_dict": { + "forum1": "f/pittsburgh", + "forum2": "f/Pennsylvania", + "item": "products", + "condition": "front of the hoodie" + }, + "intent": "Can you first make a post in f/pittsburgh (then afterwards in f/Pennsylvania) asking for a comparison between the two products in the shopping tabs? Make sure to include both their SKUs in the title and a picture of the front of the hoodie.", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/MarvelsGrantMan136", + "locator": "func:get_query_text(__page__, \"#main > article:nth-child(4)\")", + "required_contents": { + "must_include": [ + "pittsburgh", + "B01MQK1BQP", + "B07BCKK3HS" + ] + } + }, + { + "url": "__REDDIT__/user/MarvelsGrantMan136", + "locator": "func:get_query_text(__page__, \"#main > article:nth-child(3)\")", + "required_contents": { + "must_include": [ + "Pennsylvania", + "B01MQK1BQP", + "B07BCKK3HS" + ] + } + } + ], + "page_image_query": [ + { + "eval_image_url": "__REDDIT__/user/MarvelsGrantMan136", + "eval_image_class": ".submission__row", + "eval_fuzzy_image_match": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B01MQK1BQP.0.jpg", + "ssim_threshold": 0.6 + } + ] + }, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "", + "intent_template_id": 86 + } +] \ No newline at end of file diff --git a/VAB-WebArena-Lite/config_files/vwa/test_shopping.raw.json b/VAB-WebArena-Lite/config_files/vwa/test_shopping.raw.json new file mode 100644 index 0000000..fdc8f95 --- /dev/null +++ b/VAB-WebArena-Lite/config_files/vwa/test_shopping.raw.json @@ -0,0 +1,18641 @@ +[ + { + "sites": [ + "shopping" + ], + "task_id": 0, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy the least expensive {{product}} from \"{{category}}\" category.", + "intent": "Buy the least expensive red blanket (in any size) from \"Blankets & Throws\" category.", + "instantiation_dict": { + "category": "Blankets & Throws", + "product": "red blanket (in any size)" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B0983XCYK6", + "Red" + ] + } + } + ] + }, + "intent_template_id": 0, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 1, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy the least expensive {{product}} from \"{{category}}\" category.", + "intent": "Buy the least expensive blue headset from \"Virtual Reality (VR) Headsets\" category.", + "instantiation_dict": { + "category": "Virtual Reality (VR) Headsets", + "product": "blue headset" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B09MP3SGPN" + ] + } + } + ] + }, + "intent_template_id": 0, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 2, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy the least expensive {{product}} from \"{{category}}\" category.", + "intent": "Buy the least expensive canvas print with grapes from \"Posters & Prints\" category.", + "instantiation_dict": { + "category": "Posters & Prints", + "product": "canvas print with grapes" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B08ZCCRMH4" + ] + } + } + ] + }, + "intent_template_id": 0, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 3, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I am doing a market survey for one stop market, show me the most expensive {{product}} from \"{{product_category}}\" category", + "intent": "I am doing a market survey for one stop market, show me the most expensive red controller from \"PS4 accessories\" category", + "instantiation_dict": { + "product": "red controller", + "product_category": "PS4 accessories" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/dualshock-4-wireless-controller-for-playstation-4-magma-red.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 1, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 4, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I am doing a market survey for one stop market, show me the most expensive {{product}} from \"{{product_category}}\" category", + "intent": "I am doing a market survey for one stop market, show me the most expensive black and white item from \"Posters & Prints\" category", + "instantiation_dict": { + "product": "black and white item", + "product_category": "Posters & Prints" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/signature-design-by-ashley-pancho-modern-framed-cow-canvas-wall-art-48-x-36-black-white.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 1, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 5, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I am doing a market survey for one stop market, show me the most expensive {{product}} from \"{{product_category}}\" category", + "intent": "I am doing a market survey for one stop market, show me the most expensive yellow product from \"Dried Fruits & Vegetables\" category", + "instantiation_dict": { + "product": "yellow product", + "product_category": "Dried Fruits & Vegetables" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/sun-maid-california-golden-raisins-15-ounce-boxes-pack-of-24.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 1, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 6, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "{{modifier}}List the full product names of {{color}} {{product}} and tell me the price range of the available products", + "intent": "Search for 'Nike slide slippers'. List the full product names of red ones on the first page and tell me the price range of the available products", + "instantiation_dict": { + "modifier": "Search for 'Nike slide slippers'. ", + "color": "red", + "product": "ones on the first page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Nike Men's Air Max Camden Slide Sandal", + "Nike Men's Benassi JDI Fanny Pack Slides", + "Nike Offcourt Slide Mens Bq4639-002 Size 12", + "Nike Jordan Men's Break Slide Red AR6374-602", + "42.81", + "69.00" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "These models are availiable: Nike Men's Air Max Camden Slide Sandal, Nike Men's Benassi JDI Fanny Pack Slides, Nike Offcourt Slide Mens Bq4639-002 Size 12, Nike Jordan Men's Break Slide Red AR6374-602. The price ranges from $42.81 to $69.00" + }, + "intent_template_id": 2, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 7, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "{{modifier}}List the full product names of {{color}} {{product}} and tell me the price range of the available products", + "intent": "Search for 'pencil case'. List the full product names of those with animal designs on the first page and tell me the price range of the available products", + "instantiation_dict": { + "modifier": "Search for 'pencil case'. ", + "color": "those with", + "product": "animal designs on the first page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Cosmetic Bag Leather Pencil Case,Cosmetic Bag Leather Pencil Case,Two Wolves,Portable Small Makeup Bags Travel bag Multifunctional Pen Holder Box with Zipper for Women Girls Lady Teens", + "Danielle Cute Boston Terrier, Frenchie, French Bulldog Portable Pencil Case, Cosmetic Travel Organizer Bag | 9x3x3 in", + "Dinosaur Pattern Travel Bag Large Toiletries Bag Cosmetic Pouch Pencil Case Tote Multifunction Organizer Storage Bag", + "Leopard Paw Cosmetic Bag Pink Makeup Bag for Girls Women Portable Travel Pencil Case Water-resistant Organizer Birthday Christmas Gift", + "5.99", + "38.67" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "" + }, + "intent_template_id": 2, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 8, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/?q=+Lays", + "geolocation": null, + "intent_template": "{{modifier}}List the full product names of {{color}} {{product}} and tell me the price range of the available products", + "intent": "List the full product names of red packaging Lays chips on this page and tell me the price range of the available products", + "instantiation_dict": { + "modifier": "", + "color": "red packaging", + "product": "Lays chips on this page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Canadian Lays Ketchup Chips - 1 Family Size Bag", + "Canadian Lays Ketchup Potato Chips (4pk) 180g/6.3 oz., {Imported from Canada}", + "LAYS Brand , Crispy Potato Chips Smoked Salmon Cream Cheese Flavour, 67g X 2 Packs", + "11.85", + "38.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "These are availiable: Canadian Lays Ketchup Chips - 1 Family Size Bag, Canadian Lays Ketchup Potato Chips (4pk) 180g/6.3 oz., {Imported from Canada}, LAYS Brand , Crispy Potato Chips Smoked Salmon Cream Cheese Flavour, 67g X 2 Packs. The price ranges from $11.85 to $38.99" + }, + "intent_template_id": 2, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 9, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Show me the {{order}} in the \"{{product}}\" search results by {{sorting_order}}.", + "instantiation_dict": { + "order": "first item in the second row", + "product": "chairs", + "sorting_order": "descending price" + }, + "intent": "Show me the first item in the second row in the \"chairs\" search results by descending price.", + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/oeukji-office-chairs-office-products-boss-chair-business-home-office-chair-reclining-computer-chair-leather-boss-chair-swivel-chair-executive-chair.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 3, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 10, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Show me the {{order}} in the \"{{product}}\" search results by {{sorting_order}}.", + "instantiation_dict": { + "order": "third item in the second row", + "product": "frozen pizza", + "sorting_order": "descending price" + }, + "intent": "Show me the third item in the second row in the \"frozen pizza\" search results by descending price.", + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/portobello-arancini-bites-gourmet-frozen-appetizers-set-of-8-trays.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 3, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 11, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Show me the {{order}} in the \"{{product}}\" search results by {{sorting_order}}.", + "instantiation_dict": { + "order": "first item with round cookies", + "product": "ice cream sandwiches", + "sorting_order": "descending relevance" + }, + "intent": "Show me the first item with round cookies in the \"ice cream sandwiches\" search results by descending relevance.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/skinny-cow-gone-wild-low-fat-ice-cream-sandwiches-box-vanilla-24-fl-oz-pack-of-6.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 3, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 12, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Show me the {{order}} in the \"{{product}}\" search results by {{sorting_order}}.", + "instantiation_dict": { + "order": "first painting", + "product": "van gogh", + "sorting_order": "descending relevance" + }, + "intent": "Show me the first painting in the \"van gogh\" search results by descending relevance.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/decorarts-cafe-terrace-at-night-vincent-van-gogh-art-reproduction-giclee-canvas-prints-wall-art-for-home-decor-30x24.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 3, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 13, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "What is the price range for products in the {{region}}?", + "instantiation_dict": { + "region": "first row of this page" + }, + "intent": "What is the price range for products in the first row of this page?", + "require_reset": false, + "viewport_size": { + "width": 1280 + }, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "14.47", + "23.50" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$14.47 - $23.50" + }, + "intent_template_id": 4, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 14, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/video-games/nintendo-switch.html", + "geolocation": null, + "intent_template": "What is the price range for products in the {{region}}?", + "instantiation_dict": { + "region": "last row of this page" + }, + "intent": "What is the price range for products in the last row of this page?", + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "4.95", + "43.00" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$4.95 - $43.00" + }, + "intent_template_id": 4, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 15, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry/sport-specific-clothing/competitive-swimwear.html?product_list_limit=36", + "geolocation": null, + "intent_template": "What is the price range for products in the {{region}}?", + "instantiation_dict": { + "region": "second and third rows of this page" + }, + "intent": "What is the price range for products in the second and third rows of this page?", + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "4.99", + "16.09" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$4.99 - $16.09" + }, + "intent_template_id": 4, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 16, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/office-products/office-electronics/printers-accessories.html?p=3", + "geolocation": null, + "intent_template": "What is the price range for products in the {{region}}?", + "instantiation_dict": { + "region": "first column of this page" + }, + "intent": "What is the price range for products in the first column of this page?", + "require_reset": false, + "viewport_size": { + "width": 1280 + }, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "47.69", + "488.83" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$47.69 - $488.83" + }, + "intent_template_id": 4, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 17, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/electronics/video-projectors.html", + "geolocation": null, + "intent_template": "What is the price range for products in the {{region}}?", + "instantiation_dict": { + "region": "last column of this page" + }, + "intent": "What is the price range for products in the last column of this page?", + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "123.41", + "184.59" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$123.41 - $184.59" + }, + "intent_template_id": 4, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 18, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "What is the price of the most expensive {{color}} product in the \"{{product_category}}\" category?", + "instantiation_dict": { + "color": "red", + "product_category": "Basic Cases" + }, + "intent": "What is the price of the most expensive red product in the \"Basic Cases\" category?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "42.98" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$42.98" + }, + "intent_template_id": 5, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 19, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "What is the price of the most expensive {{color}} product in the \"{{product_category}}\" category?", + "instantiation_dict": { + "color": "feather lamp", + "product_category": "Lamps & Shades" + }, + "intent": "What is the price of the most expensive feather lamp product in the \"Lamps & Shades\" category?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "920.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$920.99" + }, + "intent_template_id": 5, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 20, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "What is the price of the most expensive {{color}} product in the \"{{product_category}}\" category?", + "instantiation_dict": { + "color": "blue", + "product_category": "Posters & Prints" + }, + "intent": "What is the price of the most expensive blue product in the \"Posters & Prints\" category?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "399.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$399.99" + }, + "intent_template_id": 5, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 21, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "What is the color of the {{type}} item in the \"{{product_category}}\" category?", + "instantiation_dict": { + "type": "most expensive", + "product_category": "Over-Ear Headphones" + }, + "intent": "What is the color of the most expensive item in the \"Over-Ear Headphones\" category?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "black" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "black" + }, + "intent_template_id": 6, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 22, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "What is the color of the {{type}} item in the \"{{product_category}}\" category?", + "instantiation_dict": { + "type": "second cheapest", + "product_category": "Snack Gifts" + }, + "intent": "What is the color of the second cheapest item in the \"Snack Gifts\" category?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "red |OR| yellow" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "red and yellow" + }, + "intent_template_id": 6, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 23, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "What is the color of the {{type}} item in the \"{{product_category}}\" category?", + "instantiation_dict": { + "type": "third most expensive", + "product_category": "Exercise & Fitness" + }, + "intent": "What is the color of the third most expensive item in the \"Exercise & Fitness\" category?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "red" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "red" + }, + "intent_template_id": 6, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 24, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Search for \"{{keyword}}\" and show me the {{item_order}} item {{condition}}", + "instantiation_dict": { + "keyword": "usb wifi", + "item_order": "first", + "condition": "in the second row" + }, + "intent": "Search for \"usb wifi\" and show me the first item in the second row", + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/edup-usb-wifi-adapter-dual-band-wireless-network-adapter-802-11-ac-2-4g-5g-usb-wi-fi-dongle-with-extender-antenna-compatible-with-windows-xp-vista-7-8-1-10-mac-os-x-10-7-10-15.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 7, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 25, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Search for \"{{keyword}}\" and show me the {{item_order}} item {{condition}}", + "instantiation_dict": { + "keyword": "raincoat", + "item_order": "first", + "condition": "that is blue in color" + }, + "intent": "Search for \"raincoat\" and show me the first item that is blue in color", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/wuai-women-oversized-raincoats-rain-jackets-plus-size-lightweight-waterproof-trench-coat-outdoor-packable-windbreaker-s-5xl.html |OR| __SHOPPING__/jspoyou-mens-windproof-rain-jacket-detachable-hooded-lightweight-breathable-trench-coat-winter-warm-packable-raincoat-outwear.html |OR| __SHOPPING__/lightweight-hooded-rain-coat-womens-waterproof-zipper-jacket-windproof-waterproof-raincoat-jacket-light-comfy-outdoor.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 7, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 26, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Search for \"{{keyword}}\" and show me the {{item_order}} item {{condition}}", + "instantiation_dict": { + "keyword": "plush toy", + "item_order": "first", + "condition": "that is pink in color" + }, + "intent": "Search for \"plush toy\" and show me the first item that is pink in color", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/olpchee-foldable-plush-children-s-sofa-backrest-chair-cute-cartoon-infant-baby-seat-for-living-room-bedroom-corgi.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 7, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 27, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Show me the first {{design}} in the search results for \"{{product}}\".", + "instantiation_dict": { + "design": "white table", + "product": "table" + }, + "intent": "Show me the first white table in the search results for \"table\".", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/n-a-yyhen-beautiful-three-legged-wooden-coffee-tableiving-room-balcony-simple-end-table-white.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 8, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 28, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Show me the first {{design}} in the search results for \"{{product}}\".", + "instantiation_dict": { + "design": "rainbow colored coat", + "product": "winter coat" + }, + "intent": "Show me the first rainbow colored coat in the search results for \"winter coat\".", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/winter-coat-men-fashion-casual-buckle-lapels-printed-single-breasted-suit-jacket.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 8, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 29, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Show me the first {{design}} in the search results for \"{{product}}\".", + "instantiation_dict": { + "design": "red coat", + "product": "winter coat" + }, + "intent": "Show me the first red coat in the search results for \"winter coat\".", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/smooto-winter-coats-women-s-warm-hooded-thick-padded-outerwear-jackets-overcoat.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 8, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 30, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Show me the first {{design}} in the search results for \"{{product}}\".", + "instantiation_dict": { + "design": "coat with a furry hood", + "product": "winter coat" + }, + "intent": "Show me the first coat with a furry hood in the search results for \"winter coat\".", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/charmap-winter-coats-for-women-warm-coat-jacket-outwear-faux-fur-lined-trench-winter-hooded-thick-overcoat.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 8, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 31, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Explore the \"{{category}}\" category listed by ascending price and add the first {{design}} item to my shopping cart.", + "instantiation_dict": { + "category": "Headphones", + "design": "blue" + }, + "intent": "Explore the \"Headphones\" category listed by ascending price and add the first blue item to my shopping cart.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Audio-Technica ATH-CLR100iSBL SonicFuel In-Ear Headphones with In-Line Microphone & Control, Blue" + ] + } + } + ] + }, + "intent_template_id": 9, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 32, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Explore the \"{{category}}\" category listed by ascending price and add the first {{design}} item to my shopping cart.", + "instantiation_dict": { + "category": "Makeup Palettes", + "design": "blue palette" + }, + "intent": "Explore the \"Makeup Palettes\" category listed by ascending price and add the first blue palette item to my shopping cart.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Cosmetic Matte Eyeshadow Cream Christmas Makeup Palette-Shimmer Set, 9 Colors Waterproof Eyeshadow Palettes Contour Palettes Powder Shimmer (A)" + ] + } + } + ] + }, + "intent_template_id": 9, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 33, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Explore the \"{{category}}\" category listed by ascending price and tell me the color of the {{order}} item in the first row.", + "instantiation_dict": { + "category": "Competitive Swimwear", + "order": "second" + }, + "intent": "Explore the \"Competitive Swimwear\" category listed by ascending price and tell me the color of the second item in the first row.", + "require_reset": false, + "viewport_size": { + "width": 1280 + }, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "pink" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "pink" + }, + "intent_template_id": 10, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 34, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Explore the \"{{category}}\" category listed by ascending price and tell me the colors of the {{order}} item in the first row.", + "instantiation_dict": { + "category": "Artificial Plants & Flowers", + "order": "first" + }, + "intent": "Explore the \"Artificial Plants & Flowers\" category listed by ascending price and tell me the colors of the first item in the first row.", + "require_reset": false, + "viewport_size": { + "width": 1280 + }, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "red" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "red and green" + }, + "intent_template_id": 11, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 35, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Explore the \"{{category}}\" category listed by ascending price and tell me the color of the {{order}} item in the first row.", + "instantiation_dict": { + "category": "Patio Furniture & Accessories", + "order": "fourth" + }, + "intent": "Explore the \"Patio Furniture & Accessories\" category listed by ascending price and tell me the color of the fourth item in the first row.", + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "one_of": [ + "yellow", + "gold" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "yellow" + }, + "intent_template_id": 10, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 36, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/dairy-cheese-eggs/cheese.html", + "geolocation": null, + "intent_template": "Add the {{product}} in the {{location}} to my wish list.", + "instantiation_dict": { + "product": "round cheese", + "location": "first row of this page" + }, + "intent": "Add the round cheese in the first row of this page to my wish list.", + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Queijo Minas Mineiro Curado Brazilian Minas Cheese Gourmet Aged Cheese" + ] + } + } + ] + }, + "intent_template_id": 12, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 37, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/bedding/blankets-throws.html", + "geolocation": null, + "intent_template": "Add the {{product}} to my wish list.", + "instantiation_dict": { + "product": "navy blue one in the second column" + }, + "intent": "Add the navy blue one in the second column to my wish list.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "PEACE NEST Lightweight Down and Feather Fiber Throw Blanket Soft Couch Throw for Indoor and Outdoor Use, 50\"x70\", Navy Blue" + ] + } + } + ] + }, + "intent_template_id": 13, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 38, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry/women/uniforms-work-safety.html", + "geolocation": null, + "intent_template": "Add the {{product}} to my wish list.", + "instantiation_dict": { + "product": "pink ones on this page" + }, + "intent": "Add the pink ones on this page to my wish list.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Wukong Direct Pants Practice Pants Cotton Pants Comfortable Breathable Bloomers Yoga" + ] + } + } + ] + }, + "intent_template_id": 13, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 39, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/beauty-personal-care/oral-care/orthodontic-supplies.html", + "geolocation": null, + "intent_template": "Add the {{product}} to my wish list.", + "instantiation_dict": { + "product": "colorful thing on this page" + }, + "intent": "Add the colorful thing on this page to my wish list.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "50Pcs/Bag Disposable Plastic Elastic Placers Dental Elastic Rubber Band Dental Orthodontic Product Multi Color" + ] + } + } + ] + }, + "intent_template_id": 13, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 40, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "What is the {{attribute}} and price of the cheapest item in the \"{{category}}\" category?", + "instantiation_dict": { + "attribute": "shape", + "category": "Chocolate" + }, + "intent": "What is the shape and price of the cheapest item in the \"Chocolate\" category?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "one_of": [ + "circular", + "circle", + "sphere", + "round" + ], + "must_include": [ + "1.04" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "circle, $1.04" + }, + "intent_template_id": 14, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 41, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "What is the {{attribute}} and price of the cheapest item in the \"{{category}}\" category?", + "instantiation_dict": { + "attribute": "color", + "category": "Men's Uniforms, Work & Safety" + }, + "intent": "What is the color and price of the cheapest item in the \"Men's Uniforms, Work & Safety\" category?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "white", + "0.19" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "white, $0.19" + }, + "intent_template_id": 14, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 42, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "What is the {{attribute}} and price of the cheapest item in the \"{{category}}\" category?", + "instantiation_dict": { + "attribute": "color", + "category": "Decorative Pillows, Inserts & Covers" + }, + "intent": "What is the color and price of the cheapest item in the \"Decorative Pillows, Inserts & Covers\" category?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "black", + "1.93" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "black, $1.93" + }, + "intent_template_id": 14, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 43, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Find me the cheapest item in the \"{{category}}\" category that comes in {{shape}}.", + "instantiation_dict": { + "category": "Soups, Stocks & Broths", + "shape": "rectangular packaging" + }, + "intent": "Find me the cheapest item in the \"Soups, Stocks & Broths\" category that comes in rectangular packaging.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/ocean-s-halo-organic-and-vegan-no-chicken-broth-32-oz-per-unit-2-pack.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 15, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 44, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Find me the cheapest item in the \"{{category}}\" category that comes in {{shape}}.", + "instantiation_dict": { + "category": "Posters & Prints", + "shape": "blue" + }, + "intent": "Find me the cheapest item in the \"Posters & Prints\" category that comes in blue.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/palacelearning-the-starry-night-1889-by-vincent-van-gogh-fine-art-poster-wall-art-print-laminated-18-x-24.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 15, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 45, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Find me the cheapest item in the \"{{category}}\" category that comes in {{shape}}.", + "instantiation_dict": { + "category": "Children's Dental Care", + "shape": "the style of a cupcake" + }, + "intent": "Find me the cheapest item in the \"Children's Dental Care\" category that comes in the style of a cupcake.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/children-s-toothbrush-u-shaped-age-2-12-kids-silicone-toothbrush-food-soft-silicone-brush-head-with-cute-cake-look.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 15, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 46, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/kitchen-dining/kitchen-table-linens.html", + "geolocation": null, + "intent_template": "Add the {{attribute}} one in the {{order}} to my shopping cart.", + "instantiation_dict": { + "attribute": "red", + "order": "second row of this page" + }, + "intent": "Add the red one in the second row of this page to my shopping cart.", + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "ARTSHOWING Anchor Table Runner and Placemats Set of 6, Burlap Linen Table Runners 13x90inch, Heat-Insulating Placemats for Table Decor, Love The Life You Live" + ] + } + } + ] + }, + "intent_template_id": 16, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 47, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/home-decor-products/artificial-plants-flowers.html", + "geolocation": null, + "intent_template": "Add the {{attribute}} one in the {{order}} to my shopping cart.", + "instantiation_dict": { + "attribute": "yellow", + "order": "first row of this page" + }, + "intent": "Add the yellow one in the first row of this page to my shopping cart.", + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Csyidio 6 Bundles Artificial Daisy Flowers Plastic Flowers Outdoor UV Resistant Plants Shrubs for Garden Porch Hanging Planter Window Box Decor(Yellow)" + ] + } + } + ] + }, + "intent_template_id": 16, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 48, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/office-products/office-furniture-lighting/chairs-sofas.html", + "geolocation": null, + "intent_template": "Add the {{attribute}} one in the {{order}} to my shopping cart.", + "instantiation_dict": { + "attribute": "white", + "order": "bottom row of this page" + }, + "intent": "Add the white one in the bottom row of this page to my shopping cart.", + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "YAMASORO Ergonomic Executive Office Chair White High Back Leather Computer Chair,Office Desk Chair with Arms and Wheels" + ] + } + } + ] + }, + "intent_template_id": 16, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 49, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry/men/clothing.html", + "geolocation": null, + "intent_template": "Add the {{product}} to my wish list.", + "intent": "Add the green checkered polo shirt (in any size) on this page to my wish list.", + "instantiation_dict": { + "product": "green checkered polo shirt (in any size) on this page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Men & Boy Long Sleeve Flannel Plaid Casual Shirts Button Down Regular Fit Long Sleeve Cotton Polo Shirt L-5XL" + ] + } + } + ] + }, + "intent_template_id": 13, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 50, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry/women/clothing.html?p=2&product_list_limit=36", + "geolocation": null, + "intent_template": "Add the {{product}} to my wish list.", + "intent": "Add the neon green shirt to my wish list.", + "instantiation_dict": { + "product": "neon green shirt" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Short Sleeve Dry-Fit Workout Shirt - Loppet, 2-Pack" + ] + } + } + ] + }, + "intent_template_id": 13, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 51, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/furniture/home-office-furniture.html?p=4&product_list_limit=36", + "geolocation": null, + "intent_template": "Add the {{product}} {{condition}} to my wish list.", + "intent": "Add the pink bookcase that looks like a pig to my wish list.", + "instantiation_dict": { + "product": "pink bookcase", + "condition": "that looks like a pig" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Home Bookshelf Bookcase Shelf Bookshelf Book Shelves Simple Student Desktop Small Picture Book Storage Household Simple Floor Living Room Children's Bookcase Durable Bookcase Display Shelf Storage Org" + ] + } + } + ] + }, + "intent_template_id": 17, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 52, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/patio-lawn-garden/gardening-lawn-care/pots-planters-container-accessories.html", + "geolocation": null, + "intent_template": "Add the {{product}} {{condition}} to my wish list.", + "intent": "Add the one that looks like a wooden log to my wish list.", + "instantiation_dict": { + "product": "one", + "condition": "that looks like a wooden log" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "REAL Planters H-2 Horizontal Log Planter, Medium, Oak" + ] + } + } + ] + }, + "intent_template_id": 17, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 53, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sports-outdoors/fan-shop/footwear.html", + "geolocation": null, + "intent_template": "Add the {{product}} to my wish list.", + "intent": "Add the blue and orange one to my wish list.", + "instantiation_dict": { + "product": "blue and orange one" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "SKICKS Florida Gators Slip-On Sneaker - Adult Unisex" + ] + } + } + ] + }, + "intent_template_id": 13, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 54, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/wall-art/posters-prints.html", + "geolocation": null, + "intent_template": "Add the {{product}} {{condition}} to my wish list.", + "intent": "Add the one with waves to my wish list.", + "instantiation_dict": { + "product": "one", + "condition": "with waves" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "SiMiWOW Beach Lighthouse Wall Art Blue Ocean Coastal Painting Canvas Print Framed Artwork Bathroom Bedroom Living Room Decor 16\"x24\"" + ] + } + } + ] + }, + "intent_template_id": 17, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 55, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/wall-art/posters-prints.html?product_list_order=price", + "geolocation": null, + "intent_template": "Add the {{product}} in the {{location}} to my wish list.", + "intent": "Add the first one in the second row to my wish list.", + "instantiation_dict": { + "product": "first one", + "location": "second row" + }, + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "QTESPEII Framed Kitchen Pictures Wall Art Decor Green Grape and Red Wine Canvas Prints" + ] + } + } + ] + }, + "intent_template_id": 12, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 56, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/beauty-personal-care/skin-care/sunscreens-tanning-products.html", + "geolocation": null, + "intent_template": "Add the {{product}} in the {{location}} to my wish list.", + "intent": "Add the orange one in the first row to my wish list.", + "instantiation_dict": { + "product": "orange one", + "location": "first row" + }, + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Banana Boat Ultra Sport Sunscreen Lotion, Broad Spectum SPF 30, 8 Fl Oz" + ] + } + } + ] + }, + "intent_template_id": 12, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 57, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/home-decor-products/artificial-plants-flowers.html", + "geolocation": null, + "intent_template": "Add the {{product}} {{condition}} to my wish list.", + "intent": "Add the one on this page with 'garden' written on it to my wish list.", + "instantiation_dict": { + "product": "one on this page", + "condition": "with 'garden' written on it" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Artificial Succulent Plant Pot Milk White Ceramic Flowerpot Wine Bottle can Shape Home Office Desktop Countertop Decoration Small Potted Plant (ws8003)" + ] + } + } + ] + }, + "intent_template_id": 17, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "comments": "", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 58, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/furniture/game-recreation-room-furniture.html", + "geolocation": null, + "intent_template": "Add the {{product}} in the {{location}} to my wish list.", + "intent": "Add the brown one in the first row to my wish list.", + "instantiation_dict": { + "product": "brown one", + "location": "first row" + }, + "require_reset": false, + "viewport_size": { + "width": 1280 + }, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Set of 2 Bar Stools Pub Chair Fabric w/Wood Legs Backrest Home Furniture Brown HilariousM Stoolseating Office Chair Desk Chair Bar stools Folding Table Step Stool Folding Chair Patio Chair" + ] + } + } + ] + }, + "intent_template_id": 12, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 59, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/home-decor-products/artificial-plants-flowers.html", + "geolocation": null, + "intent_template": "Tell me the price of the {{product}} in the {{location}}.", + "intent": "Tell me the price of the yellow one in the first row.", + "instantiation_dict": { + "product": "yellow one", + "location": "first row" + }, + "require_reset": false, + "viewport_size": { + "width": 1280 + }, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "37.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$8.99" + }, + "intent_template_id": 18, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 60, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/health-household/health-care.html", + "geolocation": null, + "intent_template": "Tell me the price of the {{product}}.", + "intent": "Tell me the price of the purple one on this page.", + "instantiation_dict": { + "product": "purple one on this page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "17.47" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$17.47" + }, + "intent_template_id": 19, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 61, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/bedding/decorative-pillows-inserts-covers.html", + "geolocation": null, + "intent_template": "Tell me the price of the {{product}} in the {{location}}.", + "intent": "Tell me the price of the pink one in the first row.", + "instantiation_dict": { + "product": "pink one", + "location": "first row" + }, + "require_reset": false, + "viewport_size": { + "width": 1280 + }, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "26.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$26.99" + }, + "intent_template_id": 18, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 62, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sports-outdoors/exercise-fitness.html", + "geolocation": null, + "intent_template": "Tell me the price of the {{product}} in the {{location}}.", + "intent": "Tell me the price of the yellow item in the first row.", + "instantiation_dict": { + "product": "yellow item", + "location": "first row" + }, + "require_reset": false, + "viewport_size": { + "width": 1280 + }, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "10.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$10.99" + }, + "intent_template_id": 18, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 63, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/breads-bakery/cookies.html", + "geolocation": null, + "intent_template": "Tell me the price of the {{product}} in the {{location}}.", + "intent": "Tell me the price of the purple one in the first row.", + "instantiation_dict": { + "product": "purple one", + "location": "first row" + }, + "require_reset": false, + "viewport_size": { + "width": 1280 + }, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "18.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$18.99" + }, + "intent_template_id": 18, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 64, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/breads-bakery/cookies.html", + "geolocation": null, + "intent_template": "Tell me the price of the {{product}} in the {{location}}.", + "intent": "Tell me the price of the green one in the first row.", + "instantiation_dict": { + "product": "green one", + "location": "first row" + }, + "require_reset": false, + "viewport_size": { + "width": 1280 + }, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "25.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$25.99" + }, + "intent_template_id": 18, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 65, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/video-games/playstation-4/accessories.html", + "geolocation": null, + "intent_template": "Tell me the price of the {{product}} in the {{location}}.", + "intent": "Tell me the price of the blue one in the first row.", + "instantiation_dict": { + "product": "blue one", + "location": "first row" + }, + "require_reset": false, + "viewport_size": { + "width": 1280 + }, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "16.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$16.99" + }, + "intent_template_id": 18, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 66, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention {{description}} for the {{product}}", + "instantiation_dict": { + "description": "being disappointed", + "product": "first red and yellow product" + }, + "intent": "List out reviewers, if exist, who mention being disappointed for the first red and yellow product", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "John", + "Vini C", + "Amazon Customer", + "Katie Switalski", + "Catherine L." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "John, Vini C, Amazon Customer, Katie Switalski, Catherine L." + }, + "intent_template_id": 20, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 67, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/beauty-personal-care/hair-care/hair-coloring-products.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention {{description}} for the {{product}}", + "instantiation_dict": { + "description": "'dark'", + "product": "second product in the first row" + }, + "viewport_size": { + "width": 1280 + }, + "intent": "List out reviewers, if exist, who mention 'dark' for the second product in the first row", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "jakeanielsen", + "Jessie Mora", + "Loyal AmazonianLoyal Amazonian" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "jakeanielsen, Jessie Mora, Loyal AmazonianLoyal Amazonian" + }, + "intent_template_id": 20, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 68, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention {{description}} for the {{product}}", + "instantiation_dict": { + "description": "'disgust'", + "product": "purple product" + }, + "intent": "List out reviewers, if exist, who mention 'disgust' for the purple product", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Maria A.", + "Amazon CustomerAmazon Customer" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Maria A., Amazon CustomerAmazon Customer" + }, + "intent_template_id": 20, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 69, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention {{description}} for the {{product}}", + "instantiation_dict": { + "description": "'hot'", + "product": "item in black packaging" + }, + "intent": "List out reviewers, if exist, who mention 'hot' for the item in black packaging", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Prodajr" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Prodajr" + }, + "intent_template_id": 20, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 70, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/bedding/decorative-pillows-inserts-covers.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention {{description}} for the {{product}}", + "instantiation_dict": { + "description": "'cute'", + "product": "pink thing" + }, + "intent": "List out reviewers, if exist, who mention 'cute' for the pink thing", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Lou HS", + "Mrs. Renfro" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lou HS, Mrs. Renfro" + }, + "intent_template_id": 20, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 71, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/video-games/playstation-4/accessories.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention {{description}} for the {{product}}", + "instantiation_dict": { + "description": "malfunctioning", + "product": "item with the black and white graphic design" + }, + "intent": "List out reviewers, if exist, who mention malfunctioning for the item with the black and white graphic design", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There are no reviews on this item.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 20, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 72, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/mens-sweatpants-halloween-pumpkin-face-sport-pants-joggers-pants-soft-casual-pants.html", + "geolocation": null, + "intent_template": "Add this to my cart if {{condition}}", + "instantiation_dict": { + "condition": "it comes in orange" + }, + "intent": "Add this to my cart if it comes in orange", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Mens Sweatpants Halloween Pumpkin Face Sport Pants Joggers Pants Soft Casual Pants" + ] + } + } + ] + }, + "intent_template_id": 21, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 73, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/kangol-men-s-formal-sock-3-pack.html", + "geolocation": null, + "intent_template": "Add this to my cart if {{condition}}", + "instantiation_dict": { + "condition": "it comes in red" + }, + "intent": "Add this to my cart if it comes in red", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Kangol Men's Formal Sock, 3 Pack" + ] + } + } + ] + }, + "intent_template_id": 21, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 74, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/wieco-art-sunflower-by-vincent-van-gogh-oil-paintings-reproduction-modern-floral-giclee-canvas-prints-artwork-flowers-pictures-on-canvas-wall-art-for-home-and-office-decorations.html", + "geolocation": null, + "intent_template": "Add this to my cart if {{condition}}", + "instantiation_dict": { + "condition": "it is yellow" + }, + "intent": "Add this to my cart if it is yellow", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Wieco Art Sunflower by Vincent Van Gogh" + ] + } + } + ] + }, + "intent_template_id": 21, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 75, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/electronics/car-vehicle-electronics/car-electronics.html", + "geolocation": null, + "intent_template": "Add the {{product}} in the {{location}} to my wish list.", + "intent": "Add the first product in the second row to my wish list.", + "instantiation_dict": { + "product": "first product", + "location": "second row" + }, + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Amzparts 6.5\" MIB Car Radio Stereo 19D 035 280 Carplay Mirrorlink BT USB AUX For Volkswagen Bora T-ROC Sportsvan Touran L" + ] + } + } + ] + }, + "intent_template_id": 12, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 76, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry/novelty-more.html", + "geolocation": null, + "intent_template": "Add the {{product}} in the {{location}} to my wish list.", + "intent": "Add the last product in the last row to my wish list.", + "instantiation_dict": { + "product": "last product", + "location": "last row" + }, + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "INTIMO Elf The Movie Men's Cotton Headed Ninny Muggins Allover Holiday Christmas Film Loungewear Pajama Pants" + ] + } + } + ] + }, + "intent_template_id": 12, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 77, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/patio-lawn-garden/patio-furniture-accessories.html", + "geolocation": null, + "intent_template": "Add the {{product}} in the {{location}} to my wish list.", + "intent": "Add the third product in the first row to my wish list.", + "instantiation_dict": { + "product": "third product", + "location": "first row" + }, + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Synthetic Rattan Woven Material, Plastic Rattan Furniture for Weaving and Repairing Tables and Chairs, Storage Basket, Ect (500 G)" + ] + } + } + ] + }, + "intent_template_id": 12, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 78, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/furniture/living-room-furniture.html", + "geolocation": null, + "intent_template": "Add the {{product}} in the {{location}} to my wish list.", + "intent": "Add the first product in the last row to my wish list.", + "instantiation_dict": { + "product": "first product", + "location": "last row" + }, + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "HIFIT End Table Set of 2, Narrow Side Table with Drawers, Flip Top End Side Table with Storage for Small Space, Bedside Table for Living Room Bedroom, Dark Brown" + ] + } + } + ] + }, + "intent_template_id": 12, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 79, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/electronics/video-projectors.html", + "geolocation": null, + "intent_template": "What is the price range for products in the {{region}}?", + "intent": "What is the price range for products in the second row?", + "instantiation_dict": { + "region": "second row" + }, + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "55.98", + "624.28" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$55.98 - $624.28" + }, + "intent_template_id": 4, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 80, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/cell-phones-accessories/cases-holsters-sleeves/basic-cases.html", + "geolocation": null, + "intent_template": "What is the price range for products in the {{region}}?", + "intent": "What is the price range for products in the last row?", + "instantiation_dict": { + "region": "last row" + }, + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "9.99", + "14.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$9.99 - $14.99" + }, + "intent_template_id": 4, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 81, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/health-household/health-care.html", + "geolocation": null, + "intent_template": "What is the price range for products in the {{region}}?", + "intent": "What is the price range for products in the first row?", + "instantiation_dict": { + "region": "first row" + }, + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "13.12", + "72.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$13.12 - $72.99" + }, + "intent_template_id": 4, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 82, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/alaza-cartoon-pineapple-stripe-house-fleece-slippers-comfy-bedroom-shoes-anti-slip-for-men-us-9-10.html |AND| __SHOPPING__/reef-men-s-twinpin-prints-sandals.html", + "geolocation": null, + "intent_template": "Add the product that {{attribute}} from my open tabs to the shopping cart", + "instantiation_dict": { + "attribute": "is darker in color" + }, + "intent": "Add the product that is darker in color from my open tabs to the shopping cart", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Reef Men's Twinpin" + ] + } + } + ] + }, + "intent_template_id": 22, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 83, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/adidas-originals-mens-geodiver-primeblue-athletic-and-training-shoes-orange.html |AND| __SHOPPING__/marc-joseph-new-york-mens-lightweight-fashion-slip-on-sockless-sneakers-non-slip-breathable-comfortable-walking-athletic-sports-shoes.html", + "geolocation": null, + "intent_template": "Add the product that {{attribute}} from my open tabs to the shopping cart", + "instantiation_dict": { + "attribute": "is brighter in color" + }, + "intent": "Add the product that is brighter in color from my open tabs to the shopping cart", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "adidas Originals Mens Geodiver Primeblue Athletic and Training Shoes Orange" + ] + } + } + ] + }, + "intent_template_id": 22, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 84, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/love-live-quote-art-wall-canvas-painting-dream-art-picture-black-white-print-and-poster-for-living-room-home-decor-70x100cm-3pcs-frameless.html |AND| __SHOPPING__/pittsburgh-pa-skyline-blue-by-michael-tompsett-16x24-inch-canvas-wall-art.html", + "geolocation": null, + "intent_template": "Add the product that {{attribute}} from my open tabs to the shopping cart", + "instantiation_dict": { + "attribute": "is most colorful" + }, + "intent": "Add the product that is most colorful from my open tabs to the shopping cart", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Pittsburgh PA Skyline Blue" + ] + } + } + ] + }, + "intent_template_id": 22, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 85, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/office-book-stand-tree-shaped-bookshelf-on-the-table-simple-student-shelf-bookcase-storage-shelf-organizer.html |AND| __SHOPPING__/home-bookshelf-bookcase-shelf-bookshelf-book-shelves-simple-student-desktop-small-picture-book-storage-household-simple-floor-living-room-children-s-bookcase-durable-bookcase-display-shelf-storage-org.html |AND| __SHOPPING__/lorell-6-shelf-panel-bookcase-36-x-12-x-72-mahogany.html", + "geolocation": null, + "intent_template": "Add the product that {{attribute}} from my open tabs to the shopping cart", + "instantiation_dict": { + "attribute": "looks like an animal" + }, + "intent": "Add the product that looks like an animal from my open tabs to the shopping cart", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Home Bookshelf Bookcase Shelf Bookshelf" + ] + } + } + ] + }, + "intent_template_id": 22, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 86, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add all the items I need to make this to my shopping cart.", + "intent": "Add all the items I need to make this to my shopping cart.", + "image": "coco_images/000000024567.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "hot dog", + "roll |OR| rolls |OR| bun" + ] + } + } + ] + }, + "intent_template_id": 23, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 87, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add all the items I need to make this to my shopping cart.", + "intent": "Add all the items I need to make this to my shopping cart.", + "image": "coco_images/000000029675.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "hot dog", + "roll |OR| rolls |OR| bun", + "mustard" + ] + } + } + ] + }, + "intent_template_id": 23, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 88, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry/men/clothing.html", + "geolocation": null, + "intent_template": "Add something like {{item}}, but {{attribute}}, from this page to my wish list.", + "intent": "Add something like the shirt the person on the right is wearing, but in green, from this page to my wish list.", + "image": "coco_images/000000048396.jpg", + "instantiation_dict": { + "item": "the shirt the person on the right is wearing", + "attribute": "in green" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Men & Boy Long Sleeve Flannel Plaid Casual Shirts Button Down Regular Fit Long Sleeve Cotton Polo Shirt L-5XL" + ] + } + } + ] + }, + "intent_template_id": 24, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "comments": "", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 89, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add something like the {{item}} to my shopping cart.", + "intent": "Add something like the thing at the top of this image to my shopping cart.", + "image": "coco_images/000000064574.jpg", + "instantiation_dict": { + "item": "thing at the top of this image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "hair brush" + ] + } + } + ] + }, + "intent_template_id": 25, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 90, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add something like the {{item}} to my shopping cart.", + "intent": "Add something like the colorful thing in the sky to my shopping cart.", + "image": "coco_images/000000433515.jpg", + "instantiation_dict": { + "item": "colorful thing in the sky" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There are no kites available on OneStopMarket.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 25, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 91, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add something like the {{item}} to my shopping cart.", + "intent": "Add something like the thing the person is carrying (in the same color) to my shopping cart.", + "image": "coco_images/000000329041.jpg", + "instantiation_dict": { + "item": "thing the person is carrying (in the same color)" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There are no red purses available on OneStopMarket.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 25, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 92, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add something like the {{item}} to my shopping cart.", + "intent": "Add something like the blue one to my shopping cart.", + "image": "coco_images/000000440507.jpg", + "instantiation_dict": { + "item": "blue one" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There are no luggages available on OneStopMarket.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 25, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 93, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add something like the {{item}} to my shopping cart.", + "intent": "Add something like the one in the bottom right to my shopping cart.", + "image": "coco_images/000000324715.jpg", + "instantiation_dict": { + "item": "one in the bottom right" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "scissors" + ] + } + } + ] + }, + "intent_template_id": 25, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 94, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry/men/clothing.html?p=2", + "geolocation": null, + "intent_template": "I like {{attribute}}. Can you add something like that {{condition}} to my wishlist, if there's one?", + "intent": "I like what the person on the left is wearing. Can you add something like that from this page to my wishlist, if there's one?", + "image": "coco_images/000000066771.jpg", + "instantiation_dict": { + "attribute": "what the person on the left is wearing", + "condition": "from this page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "IZOD Men's Advantage Performance Full Zip Fleece Jacket" + ] + } + } + ] + }, + "intent_template_id": 26, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "comments": "", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 95, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/index/?q=necktie&product_list_limit=36", + "geolocation": null, + "intent_template": "I like {{attribute}}. Can you add something like that {{condition}} to my wishlist, if there's one?", + "intent": "I like this person's outfit. Can you add something like that from this page in the same colors to my wishlist, if there's one?", + "image": "coco_images/000000163682.jpg", + "instantiation_dict": { + "attribute": "this person's outfit", + "condition": "from this page in the same colors" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Barry.Wang Men's Ties Set Silk Wedding Neckties Woven Silk Formal Business Casual(4/6/8PCS) |OR| Dress Shirts for Men Slim Fit, Classic Long Sleeve Solid Button Down Business Dress Shirt Wedding Party Top" + ] + } + } + ] + }, + "intent_template_id": 26, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 96, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "I need something like this for my apartment. Can you add one to my wishlist?", + "intent": "I need something like this for my apartment. Can you add one to my wishlist?", + "image": "coco_images/000000167240.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "vase" + ] + } + } + ] + }, + "intent_template_id": 27, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 97, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "I need something like this for my apartment. Can you add one to my wishlist?", + "intent": "I need something like this for my apartment. Can you add one to my wishlist?", + "image": "coco_images/000000176232.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "vase |OR| flower" + ] + } + } + ] + }, + "intent_template_id": 27, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 98, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "I need something like this for my apartment. Can you add one to my wishlist?", + "intent": "I need something like this for my apartment. Can you add one to my wishlist?", + "image": "coco_images/000000167353.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "clock" + ] + } + } + ] + }, + "intent_template_id": 27, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 99, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/office-products/office-furniture-lighting/chairs-sofas.html?product_list_limit=36", + "geolocation": null, + "intent_template": "I really like the {{item}}, but I'm not a fan of the color. Can you see if there's one in {{attribute}} on this page, and if so, add the cheapest one to my wishlist?", + "intent": "I really like the purple item, but I'm not a fan of the color. Can you see if there's one in brown on this page, and if so, add the cheapest one to my wishlist?", + "image": "coco_images/000000172595.jpg", + "instantiation_dict": { + "item": "purple item", + "attribute": "brown" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Big and Tall Office Chair Executive Chair Wide Seat High Back Ergonomic Computer Chair Home Office Desk Chair Heavy Duty PU Leather Task Chair with Lumbar Support Arms, Brown" + ] + } + } + ] + }, + "intent_template_id": 28, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 100, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "{{description}}, can you add one to my wishlist?", + "intent": "I like the shirt this person is wearing, can you add one to my wishlist?", + "image": "coco_images/000000173008.jpg", + "instantiation_dict": { + "description": "I like the shirt this person is wearing" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "hawaiian" + ] + } + } + ] + }, + "intent_template_id": 29, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 101, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "{{description}}, can you add one to my wishlist?", + "intent": "I'd like to hang this on my christmas tree too, can you add one to my wishlist?", + "image": "coco_images/000000175443.jpg", + "instantiation_dict": { + "description": "I'd like to hang this on my christmas tree too" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "teddy" + ] + } + } + ] + }, + "intent_template_id": 29, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 102, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "{{description}}, can you add one to my wishlist?", + "intent": "I need one of what its wearing for the winter, can you add one to my wishlist?", + "image": "coco_images/000000286708.jpg", + "instantiation_dict": { + "description": "I need one of what its wearing for the winter" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "beanie" + ] + } + } + ] + }, + "intent_template_id": 29, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 103, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "{{description}}, can you add one to my wishlist?", + "intent": "This looks fun and I'd like to buy this system too, can you add one to my wishlist?", + "image": "coco_images/000000324258.jpg", + "instantiation_dict": { + "description": "This looks fun and I'd like to buy this system too" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "wii" + ] + } + } + ] + }, + "intent_template_id": 29, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 104, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/index/?p=2&q=dress", + "geolocation": null, + "intent_template": "{{description}}, can you add the most similar one on this page to my wishlist?", + "intent": "I love the color and style of this, can you add the most similar one on this page to my wishlist?", + "image": "coco_images/000000327601.jpg", + "instantiation_dict": { + "description": "I love the color and style of this" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "ANTS Women's Simple Short Bridesmaid Dress Chiffon Homecoming Dresses" + ] + } + } + ] + }, + "intent_template_id": 30, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 105, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry/men/shoes.html?product_list_limit=36", + "geolocation": null, + "intent_template": "{{description}}, can you add the most similar one on this page to my wishlist?", + "intent": "The material of this looks quite good, can you add the most similar one on this page to my wishlist?", + "image": "coco_images/000000399655.jpg", + "instantiation_dict": { + "description": "The material of this looks quite good" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Cole Haan Men's Lionel Dress Chukka Boot |OR| MBT Men's Kabisa Laceup Shoe |OR| Donald J Pliner Men's Loafer, Black, 10" + ] + } + } + ] + }, + "intent_template_id": 30, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 106, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/wall-art/posters-prints.html", + "geolocation": null, + "intent_template": "{{description}}, can you add the most similar one on this page to my wishlist?", + "intent": "I like this painting, can you add the most similar one on this page to my wishlist?", + "image": "coco_images/000000415238.jpg", + "instantiation_dict": { + "description": "I like this painting" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Wieco Art Sunflower by Vincent Van Gogh Oil Paintings Reproduction Modern Floral Giclee Canvas Prints Artwork Flowers Pictures on Canvas Wall Art for Home and Office Decorations |OR| African American Canvas Wall Art Black Woman With Butterflies Paintings Sexy Black arounded by Sunflower Canvas Paintings Afro Women Picture Black Queen Inspirational Wall Art Hanging Poster Painting Black Girl Photography Abstract Watercolor Sunflower Pa" + ] + } + } + ] + }, + "intent_template_id": 30, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 107, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "I need something to store {{item}}. Add one to my shopping cart.", + "intent": "I need something to store the item on the right. Add one to my shopping cart.", + "image": "coco_images/000000444275.jpg", + "instantiation_dict": { + "item": "the item on the right" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "microwave", + "storage |OR| shelf |OR| rack" + ] + } + } + ] + }, + "intent_template_id": 31, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "comments": "__SHOPPING__/xxjf-microwave-rack-stainless-steel-2-tier-microwave-oven-rack-kitchen-storage-organiser-cabinet-printer-shelf-bakers-rack.html", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 108, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add something like {{attribute}} to my wish list.", + "intent": "Add something like what the man is wearing to my wish list.", + "image": "coco_images/000000024567.jpg", + "instantiation_dict": { + "attribute": "what the man is wearing" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "page_image_query": [ + { + "eval_image_url": "__SHOPPING__/wishlist/", + "eval_image_class": ".products-grid .wishlist .product-image-photo", + "eval_vqa": [ + { + "question": "Is this a polo shirt? (yes/no)", + "answer": "yes" + }, + { + "question": "Is this shirt green? (yes/no)", + "answer": "yes" + } + ] + } + ], + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "" + }, + "intent_template_id": 32, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 109, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Can you add {{items}} to the comparison page?", + "intent": "Can you add these items to the comparison page?", + "image": "coco_images/000000348012.jpg", + "instantiation_dict": { + "items": "these items" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/catalog/product_compare", + "locator": "", + "required_contents": { + "must_include": [ + "scissors", + "tape" + ] + } + } + ] + }, + "intent_template_id": 33, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 110, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add this exact product to my shopping cart. I think it is in the \"{{category}}\" category.", + "intent": "Add this exact product to my shopping cart. I think it is in the \"Smartwatch Accessories\" category.", + "image": "__SHOPPING__/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B09CGLFYKT.0.jpg", + "instantiation_dict": { + "category": "Smartwatch Accessories" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Fullmosa Compatible with Apple Watch Band 44mm with Bumper Case, Rugged iWatch Band with Screen Protector Only for Apple Watch Series 6/5/4/SE, Orange" + ] + } + } + ] + }, + "intent_template_id": 34, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "comments": "need to process exact image", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 111, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add this exact product to my shopping cart. I think it is in the \"{{category}}\" category.", + "intent": "Add this exact product to my shopping cart. I think it is in the \"Herbs, Spices & Seasonings\" category.", + "image": "__SHOPPING__/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B00LSYQ87G.0.jpg", + "instantiation_dict": { + "category": "Herbs, Spices & Seasonings" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Lawry's Chipotle Cinnamon Rub, 27 oz - One 27 Ounce Container of Chipotle Cinnamon Rub Made of Chipotle Chili Pepper, Cinnamon, and Paprika Perfect for Burgers, Pork, Chicken, and Vegetables" + ] + } + } + ] + }, + "intent_template_id": 34, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "comments": "need to process exact image", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 112, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add this exact product to my shopping cart. I think it is in the \"{{category}}\" category.", + "intent": "Add this exact product to my shopping cart. I think it is in the \"Dining & Entertaining\" category.", + "image": "__SHOPPING__/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B01M0HT1AW.0.jpg", + "instantiation_dict": { + "category": "Dining & Entertaining" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Reston Lloyd Plastic Tumbler Corelle Coordinates, Timber Shadows, 8oz Acrylic Drinkware, Set of 6, Juice, Clear" + ] + } + } + ] + }, + "intent_template_id": 34, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "comments": "need to process exact image", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 113, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add this exact product to my shopping cart. I think it is in the \"{{category}}\" category.", + "intent": "Add this exact product to my shopping cart. I think it is in the \"Women Accessories\" category.", + "image": "__SHOPPING__/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B093YS65BV.0.jpg", + "instantiation_dict": { + "category": "Women Accessories" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Set of 2 Mesh Laundry Bags Cute Orange Cat-1 Medium & 1 Small Bags Laundry,Blouse, Hosiery, Stocking, Underwear, Bra Lingerie, Travel Laundry Bag(8rp9k)" + ] + } + } + ] + }, + "intent_template_id": 34, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "comments": "need to process exact image, need to paginate to second page", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 114, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add this exact product to my shopping cart. I think it is in the \"{{category}}\" category.", + "intent": "Add this exact product to my shopping cart. I think it is in the \"Makeup\" category.", + "image": "__SHOPPING__/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B08GHD7MWP.0.jpg", + "instantiation_dict": { + "category": "Makeup" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Petansy 3 Packs Aloe Vera Lipstick , Lips Moisturizer Long Lasting Nutritious Lip Balm Magic Temperature Color Change Lip Gloss-Set(A)" + ] + } + } + ] + }, + "intent_template_id": 34, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "comments": "need to process exact image", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 115, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add this exact product to my shopping cart. I think it is in the \"{{category}}\" category.", + "intent": "Add this exact product to my shopping cart. I think it is in the \"Patio Furniture & Accessories\" category.", + "image": "__SHOPPING__/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B07CZHLNS2.0.jpg", + "instantiation_dict": { + "category": "Patio Furniture & Accessories" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Modway EEI-2923-GRY-GRY Aura Outdoor Patio Wicker Rattan Sofa Gray" + ] + } + } + ] + }, + "intent_template_id": 34, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "comments": "need to process exact image, need to paginate to second page", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 116, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add this exact product to my wish list. I think it might be in the \"{{category}}\" category but I'm not certain.", + "intent": "Add this exact product to my wish list. I think it might be in the \"PlayStation 4\" category but I'm not certain.", + "image": "__SHOPPING__/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B075LFHQCV.0.jpg", + "instantiation_dict": { + "category": "PlayStation 4" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Nintendo Switch Pro Controller Xenoblade 2 Edition (Japan Import)" + ] + } + } + ] + }, + "intent_template_id": 35, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "comments": "need to process exact image, product is actually in the Nintendo Switch category", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 117, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add this exact product to my wish list. I think it might be in the \"{{category}}\" category but I'm not certain.", + "intent": "Add this exact product to my wish list. I think it might be in the \"Office Furniture & Lighting > Chairs & Sofas\" category but I'm not certain.", + "image": "__SHOPPING__/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B00QGY0PQS.0.jpg", + "instantiation_dict": { + "category": "Office Furniture & Lighting > Chairs & Sofas" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Flash Furniture Low Back Designer Armless White Ribbed Swivel Task Office Chair" + ] + } + } + ] + }, + "intent_template_id": 35, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "comments": "need to process exact image, product is actually in the Home > Home & Kitchen > Furniture > Home Office Furniture category", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 118, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add this exact product to my wish list. I think it might be in the \"{{category}}\" category but I'm not certain.", + "intent": "Add this exact product to my wish list. I think it might be in the \"Men > Clothing\" category but I'm not certain.", + "image": "__SHOPPING__/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B096YB95CB.0.jpg", + "instantiation_dict": { + "category": "Men > Clothing" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "INTIMO Elf The Movie Men's Cotton Headed Ninny Muggins Allover Holiday Christmas Film Loungewear Pajama Pants" + ] + } + } + ] + }, + "intent_template_id": 35, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "comments": "need to process exact image, product is actually in the Novelty & More category", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 119, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add this exact product to my wish list. I think it might be in the \"{{category}}\" category but I'm not certain.", + "intent": "Add this exact product to my wish list. I think it might be in the \"Home D\u00e9cor Products\" category but I'm not certain.", + "image": "__SHOPPING__/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B08LD6T579.0.jpg", + "instantiation_dict": { + "category": "Home D\u00e9cor Products" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "ELUZE 4-Light Bathroom Vanity Light Black Vintage Bathroom Lighting with Clear Glass Shades Wall Sconce Lighting Fixtures for Bathroom Kitchen Hallway" + ] + } + } + ] + }, + "intent_template_id": 35, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "comments": "need to process exact image, product is actually in the Lighting & Ceiling Fans category", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 120, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add this exact product to my wish list. I think it might be in the \"{{category}}\" category but I'm not certain.", + "intent": "Add this exact product to my wish list. I think it might be in the \"Home D\u00e9cor Products\" category but I'm not certain.", + "image": "__SHOPPING__/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B08Y7N7Z7V.0.jpg", + "instantiation_dict": { + "category": "Home D\u00e9cor Products" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Dutsekk LED Bathroom Mirror Anti-Fog - 36x28 Inch Dimmable IP44 Waterproof Bathroom Mirror with Lights - Lighted Bathroom Vanity Mirror for Wall with CCT Adjustable for Makeup" + ] + } + } + ] + }, + "intent_template_id": 35, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "comments": "need to process exact image, product is actually in the Bathroom Accessories category", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 121, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Tell me how much this exact item costs on OneStopMarket.", + "intent": "Tell me how much this exact item costs on OneStopMarket.", + "image": "__SHOPPING__/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B06XKJNJJZ.0.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "501.25" + ] + }, + "reference_url": "__SHOPPING__/sandisk-extreme-pro-512-gb-cfast-card-model-sdcfsp-512g-a46d.html", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$501.25" + }, + "intent_template_id": 36, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "comments": "need to process exact image", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 122, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Tell me how much this exact item costs on OneStopMarket.", + "intent": "Tell me how much this exact item costs on OneStopMarket.", + "image": "__SHOPPING__/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B07TZD4H27.0.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "126.65" + ] + }, + "reference_url": "__SHOPPING__/martha-stewart-herbal-botany-wall-art-living-room-decor-floral-linen-canvas-home-accent-country-lifestyle-bathroom-decoration-ready-to-hang-poster-painting-for-bedroom-17-84-x-21-84-green.html", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$126.65" + }, + "intent_template_id": 36, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "comments": "need to process exact image", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 123, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Tell me how much this exact item costs on OneStopMarket.", + "intent": "Tell me how much this exact item costs on OneStopMarket.", + "image": "__SHOPPING__/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B08TC17K6D.0.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "328.77" + ] + }, + "reference_url": "__SHOPPING__/skyinbags-hand-painted-abstract-plant-tree-oil-painting-on-canvas-large-picture-wall-artwork-modern-art-poster-decoration-for-corridor-live-room-home-decor-gift-90x130cm-unframed9.html", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$328.77" + }, + "intent_template_id": 36, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "comments": "need to process exact image", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 124, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Tell me how much this exact item costs on OneStopMarket.", + "intent": "Tell me how much this exact item costs on OneStopMarket.", + "image": "__SHOPPING__/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B002IVVD4G.0.jpg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "645.00" + ] + }, + "reference_url": "__SHOPPING__/nec-np4100-6200-lumen-xga-dlp-projector.html", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$645.00" + }, + "intent_template_id": 36, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "comments": "need to process exact image", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 125, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Tell me how much this exact item costs on OneStopMarket.", + "intent": "Tell me how much this exact item costs on OneStopMarket.", + "image": "https://images.pexels.com/photos/1152077/pexels-photo-1152077.jpeg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "This exact item is not available on OneStopMarket.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 36, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "comments": "need to process exact image", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 126, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Tell me how much this exact item costs on OneStopMarket.", + "intent": "Tell me how much this exact item costs on OneStopMarket.", + "image": "https://images.pexels.com/photos/904620/pexels-photo-904620.jpeg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "This exact item does is not available on OneStopMarket.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 36, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "comments": "need to process exact image", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 127, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry/men/clothing.html", + "geolocation": null, + "intent_template": "I'm interested in the {{item}}. Can you return an image link showing {{image_desc}}?", + "intent": "I'm interested in the ugly christmas sweater. Can you return an image link showing how it looks like from the back?", + "image": null, + "instantiation_dict": { + "item": "ugly christmas sweater", + "image_desc": "how it looks like from the back" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B00NIN2O28.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B00NIN2O28.1.jpg" + }, + "intent_template_id": 37, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "comments": "hard: need to process image, look at images on the product page", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 128, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry/women/clothing.html", + "geolocation": null, + "intent_template": "I'm interested in the {{item}}. Can you return an image link showing {{image_desc}}?", + "intent": "I'm interested in the nightmare before christmas sweatshirt. Can you return an image link showing how it looks like from the back?", + "image": null, + "instantiation_dict": { + "item": "nightmare before christmas sweatshirt", + "image_desc": "how it looks like from the back" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "Such an image does not exist on the page.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 37, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "comments": "hard: need to process image, look at images on the product page", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 129, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/cell-phones-accessories/cases-holsters-sleeves/flip-cases.html", + "geolocation": null, + "intent_template": "I'm interested in the {{item}}. Can you return an image link showing {{image_desc}}?", + "intent": "I'm interested in the pink case. Can you return an image link showing how it looks like from the back?", + "image": null, + "instantiation_dict": { + "item": "pink case", + "image_desc": "how it looks like from the back" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "Such an image does not exist on the page.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 37, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "comments": "hard: need to process image, look at images on the product page", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 130, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry/women/clothing.html?p=3", + "geolocation": null, + "intent_template": "I'm interested in the {{item}}. Can you return an image link showing {{image_desc}}?", + "intent": "I'm interested in the pink hoodie. Can you return an image link showing how it looks like from the back?", + "image": null, + "instantiation_dict": { + "item": "pink hoodie", + "image_desc": "how it looks like from the back" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B073ZB2V56.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B073ZB2V56.1.jpg" + }, + "intent_template_id": 37, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 131, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/furniture/living-room-furniture.html", + "geolocation": null, + "intent_template": "I'm interested in the {{item}}. Can you return an image link showing {{image_desc}}?", + "intent": "I'm interested in the coffee table in the first row. Can you return an image link showing how the material looks like close up?", + "image": null, + "instantiation_dict": { + "item": "coffee table in the first row", + "image_desc": "how the material looks like close up" + }, + "require_reset": false, + "viewport_size": { + "width": 1280 + }, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B09KX1J3KW.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09KX1J3KW.1.jpg" + }, + "intent_template_id": 37, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 132, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/patio-lawn-garden/patio-furniture-accessories.html?p=2", + "geolocation": null, + "intent_template": "I'm interested in the {{item}}. Can you return an image link showing {{image_desc}}?", + "intent": "I'm interested in the colorful item in the first row. Can you return an image link showing how it might look if I put my feet up on it?", + "image": null, + "instantiation_dict": { + "item": "colorful item in the first row", + "image_desc": "how it might look if I put my feet up on it" + }, + "require_reset": false, + "viewport_size": { + "width": 1280 + }, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B07Q7PLPKN.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B07Q7PLPKN.1.jpg" + }, + "intent_template_id": 37, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 133, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sports-outdoors/exercise-fitness.html", + "geolocation": null, + "intent_template": "I'm interested in the {{item}}. Can you return an image link showing {{image_desc}}?", + "intent": "I'm interested in the colorful item in the first row. Can you return an image link showing how it might look when worn?", + "image": null, + "instantiation_dict": { + "item": "colorful item in the first row", + "image_desc": "how it might look when worn" + }, + "require_reset": false, + "viewport_size": { + "width": 1280 + }, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B09PF55P17.0.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09PF55P17.0.jpg" + }, + "intent_template_id": 37, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 134, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sports-outdoors/sports.html", + "geolocation": null, + "intent_template": "I'm interested in the {{item}}. Can you return an image link showing {{image_desc}}?", + "intent": "I'm interested in the boots in the first row. Can you return an image link showing how it might look when worn?", + "image": null, + "instantiation_dict": { + "item": "boots in the first row", + "image_desc": "how it might look when worn" + }, + "require_reset": false, + "viewport_size": { + "width": 1280 + }, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B09P1CJTD9.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09P1CJTD9.1.jpg" + }, + "intent_template_id": 37, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 135, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/schlumbergera-hybrid-zygocactus-christmas-cactus-plant-3-5-inch-pot-1-plant.html", + "geolocation": null, + "intent_template": "Return an image link of an image showing how this item looks like {{attribute}}.", + "intent": "Return an image link of an image showing how this item looks like when it is flowering.", + "image": null, + "instantiation_dict": { + "attribute": "when it is flowering" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B07W88SQCT.0.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B07W88SQCT.0.jpg" + }, + "intent_template_id": 38, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 136, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/ambesonne-japanese-duvet-cover-set-japanese-traditional-garden-design-wildlife-forest-meditation-origami-decorative-3-piece-bedding-set-with-2-pillow-shams-queen-size-black-and-white.html", + "geolocation": null, + "intent_template": "Return an image link of an image showing how this item looks like {{attribute}}.", + "intent": "Return an image link of an image showing how this item looks like from above.", + "image": null, + "instantiation_dict": { + "attribute": "from above" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B073Q36J45.0.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B073Q36J45.0.jpg" + }, + "intent_template_id": 38, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 137, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Return an image link of {{item}}.", + "intent": "Return an image link of the cheapest Canon printer from the \"Printers & Accessories\" category which shows it printing something.", + "image": null, + "instantiation_dict": { + "item": "the cheapest Canon printer from the \"Printers & Accessories\" category which shows it printing something" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B005TI2Q6O.0.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B005TI2Q6O.0.jpg" + }, + "intent_template_id": 39, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "comments": "", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 138, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/patio-lawn-garden/gardening-lawn-care/plants-seeds-bulbs.html", + "geolocation": null, + "intent_template": "Return an image link of {{item}}.", + "intent": "Return an image link of the most highly reviewed item on this page.", + "image": null, + "instantiation_dict": { + "item": "the most highly reviewed item on this page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B07SSFGC5S.0.jpg |OR| B07SSFGC5S.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B07SSFGC5S.0.jpg" + }, + "intent_template_id": 39, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 139, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/breads-bakery/cookies.html", + "geolocation": null, + "intent_template": "Return an image link from one of the products here showing {{item}}.", + "intent": "Return an image link from one of the products here showing a close up of some cookies, if it exists.", + "image": null, + "instantiation_dict": { + "item": "a close up of some cookies, if it exists" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B08FP4B8N9.1.jpg |OR| B07GD7RHMV.1.jpg |OR| B0719MJ5H7.0.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B08FP4B8N9.1.jpg" + }, + "intent_template_id": 40, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "comments": "", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 140, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/beauty-personal-care/oral-care/toothbrushes-accessories.html", + "geolocation": null, + "intent_template": "Return an image link from one of the products here showing {{item}}.", + "intent": "Return an image link from one of the products here showing a single blue toothbrush, if it exists.", + "image": null, + "instantiation_dict": { + "item": "a single blue toothbrush, if it exists" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B086LJX6TL.1.jpg |OR| B009OCJBN2.0.jpg |OR| B009OCJBN2.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B086LJX6TL.1.jpg" + }, + "intent_template_id": 40, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 141, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/beauty-personal-care/oral-care/toothbrushes-accessories.html", + "geolocation": null, + "intent_template": "The panda toothbrush box is cute, can you return an image link showing the design with other animals if possible?", + "intent": "The panda toothbrush box is cute, can you return an image link showing the design with other animals if possible?", + "image": null, + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B09BCV4NS1.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09BCV4NS1.1.jpg" + }, + "intent_template_id": 41, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 142, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/video-games/xbox-one.html", + "geolocation": null, + "intent_template": "I'm interested in the {{item}}. Can you return an image link showing {{image_desc}}?", + "intent": "I'm interested in the cheaper pink controller. Can you return an image link showing how it looks like from another angle?", + "image": null, + "instantiation_dict": { + "item": "cheaper pink controller", + "image_desc": "how it looks like from another angle" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B07Q4R3X1B.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B07Q4R3X1B.1.jpg or __SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B074W9FQC2.1.jpg" + }, + "intent_template_id": 37, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 143, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/beauty-personal-care/skin-care/face.html", + "geolocation": null, + "intent_template": "I'd like to share the {{item}} with my friend, can you return an image link {{description}}?", + "intent": "I'd like to share the third item with my friend, can you return an image link of an image summarizing its benefits?", + "image": null, + "instantiation_dict": { + "item": "third item", + "description": "of an image summarizing its benefits" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B07YZTN7TX.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B07YZTN7TX.1.jpg" + }, + "intent_template_id": 42, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "comments": "need to do OCR", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 144, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/furniture/living-room-furniture.html", + "geolocation": null, + "intent_template": "I'd like to share the {{item}} with my friend, can you return an image link {{description}}?", + "intent": "I'd like to share the second item with my friend, can you return an image link without the TV (just the stand)?", + "image": null, + "instantiation_dict": { + "item": "second item", + "description": "without the TV (just the stand)" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B07MG5G5B5.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B07MG5G5B5.1.jpg" + }, + "intent_template_id": 42, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 145, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/outdoor-patio-folding-side-table-square-metal-end-table-portable-small-bistro-coffee-table-green.html", + "geolocation": null, + "intent_template": "Can you return the link of an image {{description}}?", + "intent": "Can you return the link of an image highlighting the features of this table?", + "image": null, + "instantiation_dict": { + "description": "highlighting the features of this table" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B07FM3WKJ8.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B07FM3WKJ8.1.jpg" + }, + "intent_template_id": 43, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 146, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/milwaukee-pretzel-company-authentic-bavarian-style-gourmet-soft-pretzel-fresh-baked-and-shipped-16-oz-1-lb-bavarian-beast-pretzel-pack-of-2.html", + "geolocation": null, + "intent_template": "Can you return the link of an image {{description}}?", + "intent": "Can you return the link of an image illustrating the dimensions of the pretzel?", + "image": null, + "instantiation_dict": { + "description": "illustrating the dimensions of the pretzel" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B079B43M9T.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B079B43M9T.1.jpg" + }, + "intent_template_id": 43, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 147, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/crestlive-products-dresser-storage-drawer-organizer-fabric-dresser-for-bedroom-living-room-entryway-closets-easy-pull-fabric-bins-wood-top-mixed-color.html", + "geolocation": null, + "intent_template": "Can you return the link of an image {{description}}?", + "intent": "Can you return the link of an image illustrating the dimensions of the dresser?", + "image": null, + "instantiation_dict": { + "description": "illustrating the dimensions of the dresser" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "Such an image does not exist on the page.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 43, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "comments": "", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 148, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/dave-s-killer-bread-good-seed-4-loaves-usda-organic.html", + "geolocation": null, + "intent_template": "Can you return the link of an image {{description}}?", + "intent": "Can you return the link of an image showing a close up of the bread (without the packaging)?", + "image": null, + "instantiation_dict": { + "description": "showing a close up of the bread (without the packaging)" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "Such an image does not exist on the page.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 43, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 149, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/dave-s-killer-bread-good-seed-4-loaves-usda-organic.html", + "geolocation": null, + "intent_template": "Can you return the link of an image {{description}}?", + "intent": "Can you return the link of an image showing the nutritional information of this?", + "image": null, + "instantiation_dict": { + "description": "showing the nutritional information of this" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B00U10YS7K.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B00U10YS7K.1.jpg" + }, + "intent_template_id": 43, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 150, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I remembered that I ordered something like this, can you show me the corresponding order page?", + "intent": "I remembered that I ordered something like this, can you show me the corresponding order page?", + "image": "https://images.pexels.com/photos/12021852/pexels-photo-12021852.jpeg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/sales/order/view/order_id/161/", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 44, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "pg1. __SHOPPING__/wangliwer-mini-wireless-bluetooth-speaker-with-mic-tf-card-metal-portable-subwoof-sound-fm-radio-aux-mp3-music-plays-loudspeaker-portable-speaker-color-pink.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 151, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I remembered that I ordered something like this, can you show me the corresponding order page?", + "intent": "I remembered that I ordered something like this, can you show me the corresponding order page?", + "image": "https://images.pexels.com/photos/932638/pexels-photo-932638.jpeg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/sales/order/view/order_id/157/", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 44, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "pg1. __SHOPPING__/photo-backgrounds-blue-sea-background-for-shiny-underwater-photography-pea-bulb-scenic-party-decor-photo-decorations-photocall-photo-studio-5x3ft.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 152, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I remembered that I ordered something like this, can you show me the corresponding order page?", + "intent": "I remembered that I ordered something like this, can you show me the corresponding order page?", + "image": "https://images.pexels.com/photos/18541045/pexels-photo-18541045/free-photo-of-standing-giraffes-in-glass-lens-filter.jpeg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/sales/order/view/order_id/165/", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 44, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "pg3. __SHOPPING__/37mm-cpl-polarizing-lens-filter-portable-polarizer-camera-lens-mobile-phone-clip-designed-polarizer-lens-filter-improve-color-saturation-and-contrast-for-eliminating-or-reducing-light-spots.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 153, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me {{category}} of the same brand as the product in this picture.", + "intent": "Find me a game console of the same brand as the product in this picture.", + "image": "https://images.pexels.com/photos/5626726/pexels-photo-5626726.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "category": "a game console" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_product_attributes(__page__, 'manufacturer |OR| brand name')", + "required_contents": { + "must_include": [ + "Microsoft" + ] + } + } + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".fotorama__stage__frame", + "eval_vqa": [ + { + "question": "Is this a game console? (yes/no)", + "answer": "yes" + } + ] + } + ] + }, + "intent_template_id": 45, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/microsoft-xbox-one-console-with-accessories-1tb-hdd-black.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 154, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me {{category}} of the same brand as the product in this picture.", + "intent": "Find me a set of headphones of the same brand as the product in this picture.", + "image": "https://images.pexels.com/photos/45889/camera-photo-camera-sony-alpha-7-sony-45889.jpeg", + "instantiation_dict": { + "category": "a set of headphones" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_product_attributes(__page__, 'manufacturer |OR| brand name')", + "required_contents": { + "must_include": [ + "Sony" + ] + } + } + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".fotorama__stage__frame", + "eval_vqa": [ + { + "question": "Is this a headphone? (yes/no)", + "answer": "yes" + } + ] + } + ] + }, + "intent_template_id": 45, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 155, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me {{category}} of the same brand as the product in this picture. It needs to {{description}}.", + "intent": "Find me a set of headphones of the same brand as the product in this picture. It needs to be silver colored and have at least 10 reviews with an average rating greater than 4.", + "image": "https://images.pexels.com/photos/1528851/pexels-photo-1528851.jpeg", + "instantiation_dict": { + "category": "a set of headphones", + "description": "be silver colored and have at least 10 reviews with an average rating greater than 4" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/sony-wh1000xm3-bluetooth-wireless-noise-canceling-headphones-silver-wh-1000xm3-s-renewed.html |OR| __SHOPPING__/sony-h900n-hi-res-noise-cancelling-wireless-headphone-grayish-black-renewed.html |OR| __SHOPPING__/sony-mdrx10-red-simon-cowell-x-headphones-with-50mm-diaphragms.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 46, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 156, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me {{category}} of the same brand as the product in this picture. It needs to {{description}}.", + "intent": "Find me a printer of the same brand as the product in this picture. It needs to be white and have at least 11 reviews with an average rating greater than 4.", + "image": "https://images.pexels.com/photos/7639358/pexels-photo-7639358.jpeg", + "instantiation_dict": { + "category": "a printer", + "description": "be white and have at least 11 reviews with an average rating greater than 4" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/hp-deskjet-f380-all-in-one-printer-scanner-copier.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 46, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 157, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me {{category}} of the same brand as the product in this picture. It needs to {{description}}.", + "intent": "Find me protein bars of the same brand as the product in this picture. It needs to have price between $30 and $50.", + "image": "https://images.pexels.com/photos/12709747/pexels-photo-12709747.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "category": "protein bars", + "description": "have price between $30 and $50" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/kellogg-s-special-k-protein-bars-meal-replacement-protein-snacks-value-size-chocolatey-chip-cookie-dough-19oz-box-12-bars.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 46, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 158, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me {{category}} that is of the same brand as products in one of the following pictures. It needs to {{description}}.", + "intent": "Find me a digital camera that is of the same brand as products in one of the following pictures. It needs to be black and have at least 12 reviews. Furthermore, it cannot be too expensive - it needs to have price less than $700.", + "image": [ + "https://images.pexels.com/photos/1022187/pexels-photo-1022187.jpeg", + "https://images.pexels.com/photos/89551/pexels-photo-89551.jpeg" + ], + "instantiation_dict": { + "category": "a digital camera", + "description": "be black and have at least 12 reviews. Furthermore, it cannot be too expensive - it needs to have price less than $700" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_num_reviews(__page__)", + "required_contents": { + "required_values": [ + ">= 12" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_product_price(__page__)", + "required_contents": { + "required_values": [ + "< 700" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_product_attributes(__page__, 'manufacturer |OR| brand name')", + "required_contents": { + "must_include": [ + "Olympus |OR| Nikon" + ] + } + } + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".fotorama__stage__frame", + "eval_vqa": [ + { + "question": "Is this a digital camera? (yes/no)", + "answer": "yes" + }, + { + "question": "Is the color of the product black? (yes/no)", + "answer": "yes" + } + ] + } + ] + }, + "intent_template_id": 47, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__SHOPPING__/olympus-om-d-e-m1-16mp-mirrorless-digital-camera-with-3-inch-lcd-body-only-silver-w-black-trim.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 159, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add something like {{attribute}} to my wish list.", + "intent": "Add something like what the person is covered with in a similar color to my wish list.", + "image": "https://images.pexels.com/photos/269141/pexels-photo-269141.jpeg", + "instantiation_dict": { + "attribute": "what the person is covered with in a similar color" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "page_image_query": [ + { + "eval_image_url": "__SHOPPING__/wishlist/", + "eval_image_class": ".products-grid .wishlist .product-image-photo", + "eval_vqa": [ + { + "question": "Is this a blanket? (yes/no)", + "answer": "yes" + }, + { + "question": "Is this blanket light blue? (yes/no)", + "answer": "yes" + } + ] + } + ], + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "" + }, + "intent_template_id": 32, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "__SHOPPING__/wellber-extra-soft-fuzzy-faux-fur-throw-blanket-reversible-decorative-fluffy-sherpa-blanket-lightweight-cozy-microfiber-plush-furry-shaggy-fleece-blanket-for-couch-sofa-bed-50-x60-ice-blue.html", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 160, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add something like {{attribute}} to my wish list.", + "intent": "Add something like what the girl is holding to my wish list.", + "image": "https://images.pexels.com/photos/5614386/pexels-photo-5614386.jpeg", + "instantiation_dict": { + "attribute": "what the girl is holding" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "page_image_query": [ + { + "eval_image_url": "__SHOPPING__/wishlist/", + "eval_image_class": ".products-grid .wishlist .product-image-photo", + "eval_vqa": [ + { + "question": "Is this a cookie? (yes/no)", + "answer": "yes" + } + ] + } + ], + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "" + }, + "intent_template_id": 32, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 161, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/wall-art/posters-prints.html", + "geolocation": null, + "intent_template": "Add something like {{item}}, but {{attribute}}, from this page to my wish list.", + "intent": "Add something like the picture shown, but in sketching style, from this page to my wish list.", + "image": "https://images.pexels.com/photos/1054672/pexels-photo-1054672.jpeg", + "instantiation_dict": { + "item": "the picture shown", + "attribute": "in sketching style" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Yelash Cute Elephant Canvas Wall Art Colorful Dots and Sketch Elephant Picture Print for Kids Room Bathroom Poster Framed Ready to Hang (12\"x16\"x1 Panel)" + ] + } + } + ] + }, + "intent_template_id": 24, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "__SHOPPING__/yelash-cute-elephant-canvas-wall-art-colorful-dots-and-sketch-elephant-picture-print-for-kids-room-bathroom-poster-framed-ready-to-hang-12-x16-x1-panel.html", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 162, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/cell-phones-accessories/cases-holsters-sleeves/basic-cases.html?p=2", + "geolocation": null, + "intent_template": "Add something like {{item}}, but {{attribute}}, from this page to my wish list.", + "intent": "Add something like what is used to protect the phone in the picture, but one with a ring kickstand, from this page to my wish list.", + "image": "https://images.pexels.com/photos/3392232/pexels-photo-3392232.jpeg", + "instantiation_dict": { + "item": "what is used to protect the phone in the picture", + "attribute": "one with a ring kickstand" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "KSELF for iPhone 11 Case with [2 Pack] Tempered Glass Screen Protector, Hard PC Slim Cover with Ring Kickstand, Full Protective Shockproof Dual Layer Hybrid Bumper Case for iPhone 11 6.1 inch (Blue)" + ] + } + } + ] + }, + "intent_template_id": 24, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/kself-for-iphone-11-case-with-2-pack-tempered-glass-screen-protector-hard-pc-slim-cover-with-ring-kickstand-full-protective-shockproof-dual-layer-hybrid-bumper-case-for-iphone-11-6-1-inch-blue.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 163, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add something like {{attribute}} to my wish list.", + "intent": "Add something like this to my wish list.", + "image": "https://images.pexels.com/photos/837267/pexels-photo-837267.jpeg", + "instantiation_dict": { + "attribute": "this" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "page_image_query": [ + { + "eval_image_url": "__SHOPPING__/wishlist/", + "eval_image_class": ".products-grid .wishlist .product-image-photo", + "eval_vqa": [ + { + "question": "Is this a cactus? (yes/no)", + "answer": "yes" + }, + { + "question": "Is this a potted plant? (yes/no)", + "answer": "yes" + } + ] + } + ], + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "" + }, + "intent_template_id": 32, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "__SHOPPING__/artificial-cactus-fake-cactus-16-faux-cacti-plants-for-home-garden-office-store-decoration.html", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 164, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me {{category}} that is of the same brand as products in one of the following pictures. It needs to {{description}}.", + "intent": "Find me a television that is of the same brand as products in one of the following pictures. It needs to have price at least $600.", + "image": [ + "https://images.pexels.com/photos/1447254/pexels-photo-1447254.jpeg", + "https://images.pexels.com/photos/3066/smartphone-mockup-mobile-phone-screen.jpg" + ], + "instantiation_dict": { + "category": "a television", + "description": "have price at least $600" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_product_price(__page__)", + "required_contents": { + "required_values": [ + ">= 600" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_product_attributes(__page__, 'brand name |OR| manufacturer')", + "required_contents": { + "must_include": [ + "LG |OR| SAMSUNG" + ] + } + } + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".fotorama__stage__frame", + "eval_vqa": [ + { + "question": "Is this a television? (yes/no)", + "answer": "yes" + } + ] + } + ] + }, + "intent_template_id": 47, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__SHOPPING__/lg-50nano80upa-50-nanocell-4k-nano80-series-smart-ultra-hd-tv-with-an-lg-sn6y-3-1-channel-dts-virtual-high-resolution-soundbar-and-subwoofer-2021.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 165, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me {{category}} that is of the same brand as products in one of the following pictures. It needs to {{description}}.", + "intent": "Find me a men's tennis shoe that is of the same brand as products in one of the following pictures. It needs to have price between $60 and $120 with at least 11 reviews.", + "image": [ + "https://images.pexels.com/photos/1383775/pexels-photo-1383775.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "https://images.pexels.com/photos/6748322/pexels-photo-6748322.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1" + ], + "instantiation_dict": { + "category": "a men's tennis shoe", + "description": "have price between $60 and $120 with at least 11 reviews" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_product_price(__page__)", + "required_contents": { + "required_values": [ + ">= 60", + "<= 120" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_num_reviews(__page__)", + "required_contents": { + "required_values": [ + ">= 11" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_product_attributes(__page__, 'manufacturer |OR| brand name')", + "required_contents": { + "must_include": [ + "Asics |OR| Nike" + ] + } + }, + { + "url": "last", + "locator": "lambda:(() => { try { return document.querySelector('#maincontent > div.page-title-wrapper.product > h1 > span').textContent } catch (e) { return '' }})()", + "required_contents": { + "must_include": [ + "Men", + "Tennis Shoe" + ] + } + } + ] + }, + "intent_template_id": 47, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__SHOPPING__/asics-men-s-gel-resolution-8-tennis-shoes.html, __SHOPPING__/nike-men-s-court-lite-2-tennis-shoe.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 166, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me {{category}} that {{relation1}} first picture and has {{relation2}} second picture. It needs to have {{relation3}} third picture.", + "intent": "Find me a shoe that is designed for the sport depicted in the first picture and has the same brand as products in the second picture. It needs to have the same color as the flower in the third picture.", + "image": [ + "https://images.pexels.com/photos/2291004/pexels-photo-2291004.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "https://images.pexels.com/photos/3281608/pexels-photo-3281608.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "https://images.pexels.com/photos/112398/pexels-photo-112398.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1" + ], + "instantiation_dict": { + "category": "a shoe", + "relation1": "is designed for the sport depicted in the", + "relation2": "the same brand as products in the", + "relation3": "the same color as the flower in the" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_product_attributes(__page__, 'manufacturer |OR| brand name')", + "required_contents": { + "must_include": [ + "adidas" + ] + } + }, + { + "url": "last", + "locator": "lambda:(() => { try { return document.querySelector('#maincontent > div.page-title-wrapper.product > h1 > span').textContent } catch (e) { return '' }})()", + "required_contents": { + "must_include": [ + "Basketball", + "Sneaker |OR| Shoe" + ] + } + } + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".fotorama__stage__frame", + "eval_vqa": [ + { + "question": "Is the shoe red? (yes/no)", + "answer": "yes" + } + ] + } + ] + }, + "intent_template_id": 48, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__SHOPPING__/adidas-mens-harden-be-x-basketball-sneakers-shoes-casual-red.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 167, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me {{category}} that {{relation1}} first picture and has {{relation2}} second picture. It needs to have {{relation3}} third picture.", + "intent": "Find me a shoe that is designed for the sport depicted in the first picture and has the same brand as products in the second picture. It needs to have the same color as the third picture.", + "image": [ + "https://images.pexels.com/photos/5739218/pexels-photo-5739218.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "https://images.pexels.com/photos/7154759/pexels-photo-7154759.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "https://images.pexels.com/photos/4098504/pexels-photo-4098504.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1" + ], + "instantiation_dict": { + "category": "a shoe", + "relation1": "is designed for the sport depicted in the", + "relation2": "the same brand as products in the", + "relation3": "the same color as the" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "Such an item is not available on OneStopMarket.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 48, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 168, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I really like {{stuff}}. Can you find me {{product}} and add it to my shopping cart?", + "intent": "I really like this drink. Can you find me mochi that have flavor of this drink and add it to my shopping cart?", + "image": [ + "https://images.pexels.com/photos/6413654/pexels-photo-6413654.jpeg" + ], + "instantiation_dict": { + "stuff": "this drink", + "product": "mochi that have flavor of this drink" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart/", + "locator": "lambda:(() => { try { return Array.from(document.querySelectorAll('.item-info')).find(el => el.textContent.includes('Royal Family 2 Packs of Mini Mochi Individually Wrapped- japenese taiwanese asia sweet rice wagashi snack dessert dagashi candies cake candy drinks daifuku (Bubble Milk Tea, Total 8.4oz)')).querySelector('td.col.item > div > dl > dd').textContent.trim() } catch (e) { return '' }})()", + "required_contents": { + "exact_match": "Bubble Milk Tea" + } + } + ] + }, + "intent_template_id": 49, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__SHOPPING__/royal-family-2-packs-of-mini-mochi-individually-wrapped-japenese-taiwanese-asia-sweet-rice-wagashi-snack-dessert-dagashi-candies-cake-candy-drinks-daifuku-bubble-milk-tea-total-8-4oz.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 169, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Can you find me {{category}} that combines the {{attribute}} of the items in the following images?", + "intent": "Can you find me a product that combines the functions of the items in the following images?", + "image": [ + "https://images.pexels.com/photos/9582658/pexels-photo-9582658.jpeg", + "https://images.pexels.com/photos/3205735/pexels-photo-3205735.jpeg" + ], + "instantiation_dict": { + "category": "a product", + "attribute": "functions" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "lambda:(() => { try { return document.querySelector('#maincontent > div.page-title-wrapper.product > h1 > span').textContent } catch (e) { return '' }})()", + "required_contents": { + "must_include": [ + "Clock", + "Camera" + ] + } + } + ] + }, + "intent_template_id": 50, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__SHOPPING__/hidden-clock-camera-wifi-hd-1080p-spy-camera-with-night-vision-motion-detection-loop-recording-covert-nanny-cam-for-home-office-surveillance-no-audio.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 170, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Can you find me {{category}} that combines the {{attributes}} of the items in the following images?", + "intent": "Can you find me a product that combines the functions of the items in the following images?", + "image": [ + "https://images.pexels.com/photos/4206091/pexels-photo-4206091.jpeg", + "https://images.pexels.com/photos/1772123/pexels-photo-1772123.jpeg" + ], + "instantiation_dict": { + "category": "a product", + "attributes": "functions" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "lambda:(() => { try { return document.querySelector('#maincontent > div.page-title-wrapper.product > h1 > span').textContent } catch (e) { return '' }})()", + "required_contents": { + "must_include": [ + "Projection Keyboard" + ] + } + } + ] + }, + "intent_template_id": 51, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__SHOPPING__/la-guapa-virtual-projection-keyboard-laser-projection-bluetooth-wireless-keyboard-for-smart-phone-pc-tablet-laptop-wireless-laser-projection-keyboard-silver.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 171, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Can you find me {{category}} that combines the {{attributes}} of the items in the following images?", + "intent": "Can you find me a taiwanese snack that combines the flavors of the items in the following images?", + "image": [ + "https://images.pexels.com/photos/947879/pexels-photo-947879.jpeg", + "https://images.pexels.com/photos/4110006/pexels-photo-4110006.jpeg" + ], + "instantiation_dict": { + "category": "a taiwanese snack", + "attributes": "flavors" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "lambda:(() => { try { return document.querySelector('#maincontent > div.page-title-wrapper.product > h1 > span').textContent } catch (e) { return '' }})()", + "required_contents": { + "must_include": [ + "SUNNY HILLS Pineapple Cake" + ] + } + } + ] + }, + "intent_template_id": 51, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__SHOPPING__/sunny-hills-pineapple-cake-10pcs-500g-best-taiwanese-gift-sunny-hills-fresh-stock-taiwan-food-pineapple-cake.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 172, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I really like {{stuff}}. Can you find me {{product}} and add it to my shopping cart?", + "intent": "I really like this animal's pattern. Can you find me a queen size bedding set that has the same pattern and add it to my shopping cart?", + "image": [ + "https://images.pexels.com/photos/39857/leopard-leopard-spots-animal-wild-39857.jpeg" + ], + "instantiation_dict": { + "stuff": "this animal's pattern", + "product": "a queen size bedding set that has the same pattern" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart/", + "locator": "lambda:(() => { try { return Array.from(document.querySelectorAll('.item-info')).find(el => el.textContent.includes('Girls Leopard Print Bedding Set Teens Luxury Pink Black Cheetah Print Bed Sheet Set Twin Size for Kids Children Women Bedroom Decor Fitted Sheet Chic Wild Animal Bed Cover with 1 Pillow Case')).querySelector('td.col.item > div > dl > dd:nth-child(4)').textContent.trim() } catch (e) { return '' }})()", + "required_contents": { + "exact_match": "Queen" + } + } + ] + }, + "intent_template_id": 49, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__SHOPPING__/girls-leopard-print-bedding-set-teens-luxury-pink-black-cheetah-print-bed-sheet-set-twin-size-for-kids-children-women-bedroom-decor-fitted-sheet-chic-wild-animal-bed-cover-with-1-pillow-case.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 173, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I really like {{stuff}}. Can you find me {{product}} and add it to my shopping cart?", + "intent": "I really like this fruit. Can you find me a 1 fl oz fragrance that has the same smell and add it to my shopping cart?", + "image": [ + "https://images.pexels.com/photos/209549/pexels-photo-209549.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1" + ], + "instantiation_dict": { + "stuff": "this fruit", + "product": "a 1 fl oz fragrance that has the same smell" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart/", + "locator": "lambda:(() => { try { return Array.from(document.querySelectorAll('.item-info')).find(el => el.textContent.includes('Jo Malone Grapefruit Cologne Spray for Women, 1 Ounce')).querySelector('td.col.item > div > dl > dd').textContent.trim() } catch (e) { return '' }})()", + "required_contents": { + "exact_match": "1 Fl Oz (Pack of 1)" + } + } + ] + }, + "intent_template_id": 49, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__SHOPPING__/jo-malone-grapefruit-cologne-spray-for-women-1-ounce.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 174, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Can you find me {{category}} that combines the {{attributes}} of the items in the following images and buy it for me?", + "intent": "Can you find me a product that combines the functions of the items in the following images and buy it for me?", + "image": [ + "https://images.pexels.com/photos/2529147/pexels-photo-2529147.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "https://images.pexels.com/photos/244553/pexels-photo-244553.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1" + ], + "instantiation_dict": { + "category": "a product", + "attributes": "functions" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector('.order-details-items.ordered').outerText", + "required_contents": { + "must_include": [ + "roller skate" + ] + } + } + ] + }, + "intent_template_id": 52, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__SHOPPING__/sxzhsm-roller-skates-for-women-outdoor-parkour-shoes-with-wheels-for-girls-boys-kick-rollers-shoes-retractable-adults-kids-quad-roller-skates-men-unisex-skating-shoes-recreation-sneakers-eu39-white.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 175, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me {{category}} that is of the same brand as the product in this picture and buy {{quantity}} for me. It needs to {{description}}.", + "intent": "Find me a wireless white game controller that is of the same brand as the product in this picture and buy it for me. It needs to have at 10 reviews and be priced between $110-$120.", + "image": "https://images.pexels.com/photos/8998292/pexels-photo-8998292.jpeg", + "instantiation_dict": { + "category": "a wireless white game controller", + "description": "have at 10 reviews and be priced between $110-$120", + "quantity": "it" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector('.order-details-items.ordered').outerText", + "required_contents": { + "must_include": [ + "B07P3L5GMW" + ] + } + } + ] + }, + "intent_template_id": 53, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__SHOPPING__/xbox-wireless-controller-phantom-white-special-edition.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 176, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me {{category}} that is of the same brand as the product in this picture and buy {{quantity}} for me. It needs to {{description}}.", + "intent": "Find me a black headphone that is of the same brand as the product in this picture and buy it for me. It needs to be priced more than $1000.", + "image": "https://images.pexels.com/photos/3541365/pexels-photo-3541365.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "category": "a black headphone", + "description": "be priced more than $1000", + "quantity": "it" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector('.order-details-items.ordered').outerText", + "required_contents": { + "must_include": [ + "B01LD5GO7I" + ] + } + } + ] + }, + "intent_template_id": 53, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__SHOPPING__/sony-mdr-z1r-ww2-signature-hi-res-headphone-black-international-version.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 177, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me {{category}} that is of the same brand as the product in this picture and buy {{quantity}} for me. It needs to {{description}}.", + "intent": "Find me a headphone that is of the same brand as the product in this picture and buy it for me. It needs to have color sliver and have at least 10 reviews with an average rating greater than 4.", + "image": "https://images.pexels.com/photos/4219883/pexels-photo-4219883.jpeg", + "instantiation_dict": { + "category": "a headphone", + "description": "have color sliver and have at least 10 reviews with an average rating greater than 4", + "quantity": "it" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector('.order-details-items.ordered').outerText", + "required_contents": { + "must_include": [ + "B07J9YNRWF |OR| B07CX6HHFK |OR| B00972M942" + ] + } + } + ] + }, + "intent_template_id": 53, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 178, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me {{category}} that is of the same brand as the product in this picture and buy {{quantity}} for me. It needs to {{description}}.", + "intent": "Find me a multifunction printer (copy/scan/fax/print) that is of the same brand as the product in this picture and buy \ud83d\udd1f of them for me. It needs to have color black and have at least 5 reviews with an average rating greater than 3.", + "image": "https://images.pexels.com/photos/11129922/pexels-photo-11129922.jpeg", + "instantiation_dict": { + "category": "a multifunction printer (copy/scan/fax/print)", + "quantity": "\ud83d\udd1f of them", + "description": "have color black and have at least 5 reviews with an average rating greater than 3" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:shopping_get_order_product_quantity(__page__,'B00PZ9WV94')", + "required_contents": { + "required_values": [ + "== 10" + ] + } + } + ] + }, + "intent_template_id": 53, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__SHOPPING__/hp-cf484a-laserjet-pro-mfp-m225dn-multifunction-laser-printer-copy-fax-print-scan.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 179, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me {{category}} that is of the same brand as the product in this picture and buy {{quantity}} for me. It needs to {{description}}.", + "intent": "Find me all of the protein bars that is of the same brand as the product in this picture and buy 12 bars of each (at the lowest possible per unit price) for me. It needs to have per unit price between $1.66 and $4.08.", + "image": "https://images.pexels.com/photos/12538285/pexels-photo-12538285.jpeg", + "instantiation_dict": { + "category": "all of the protein bars", + "quantity": "12 bars of each (at the lowest possible per unit price)", + "description": "have per unit price between $1.66 and $4.08" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:shopping_get_order_product_quantity(__page__,'B00MXUFL0E')", + "required_contents": { + "required_values": [ + "== 1" + ] + } + }, + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:shopping_get_order_product_option(__page__, 'B00MXUFL0E', 'Size')", + "required_contents": { + "must_include": [ + "12 Count (Pack of 1)" + ] + } + }, + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:shopping_get_order_product_quantity(__page__,'B07ZD2PB9F')", + "required_contents": { + "required_values": [ + "== 1" + ] + } + } + ] + }, + "intent_template_id": 53, + "reasoning_difficulty": "hrad", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__SHOPPING__/kellogg-s-special-k-protein-bars-meal-replacement-protein-snacks-value-size-strawberry-19oz-box-12-bars.html, __SHOPPING__/kellogg-s-special-k-protein-bars-meal-replacement-protein-snacks-value-size-chocolatey-chip-cookie-dough-19oz-box-12-bars.html. Per unit price is calculated as listed price divided by 12." + }, + { + "sites": [ + "shopping" + ], + "task_id": 180, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me {{category}} that is of the same brand as products in one of the following pictures and buy {{quantity}} for me. It needs to {{description}}.", + "intent": "Find me a digital camera with warranty that is of the same brand as products in one of the following pictures and buy one for me. It needs to be black and have at least 12 reviews. Furthermore, it cannot be too expensive - it needs to have price less than $700.", + "image": [ + "https://images.pexels.com/photos/6708408/pexels-photo-6708408.jpeg" + ], + "instantiation_dict": { + "category": "a digital camera with warranty", + "quantity": "one", + "description": "be black and have at least 12 reviews. Furthermore, it cannot be too expensive - it needs to have price less than $700" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:shopping_get_order_product_quantity(__page__,'B001P06Q66')", + "required_contents": { + "required_values": [ + "== 1" + ] + } + } + ] + }, + "intent_template_id": 54, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__SHOPPING__/olympus-sp-590uz-12mp-digital-camera-with-26x-wide-angle-optical-dual-image-stabilized-zoom-and-2-7-inch-lcd-black.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 181, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/wall-art/posters-prints.html", + "geolocation": null, + "intent_template": "Buy me something like {{item}}, but {{attribute}}, from this page.", + "intent": "Buy me something like the picture shown, but in sketching style, from this page.", + "image": "https://images.pexels.com/photos/3751675/pexels-photo-3751675.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "item": "the picture shown", + "attribute": "in sketching style" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:shopping_get_order_product_option(__page__, 'B091FT6ZJC', 'Color')", + "required_contents": { + "must_include": [ + "Cute Elephant" + ] + } + } + ] + }, + "intent_template_id": 55, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "__SHOPPING__/yelash-cute-elephant-canvas-wall-art-colorful-dots-and-sketch-elephant-picture-print-for-kids-room-bathroom-poster-framed-ready-to-hang-12-x16-x1-panel.html", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 182, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/cell-phones-accessories/cases-holsters-sleeves/basic-cases.html?p=2", + "geolocation": null, + "intent_template": "Buy me something like {{item}}, but {{attribute}}, from this page.", + "intent": "Buy me something like what is used to protect the phone in the picture, but one with a soldier figure, from this page.", + "image": "https://images.pexels.com/photos/374004/pexels-photo-374004.jpeg", + "instantiation_dict": { + "item": "what is used to protect the phone in the picture", + "attribute": "one with a soldier figure" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector('.order-details-items.ordered').outerText", + "required_contents": { + "must_include": [ + "B095GYK1VZ" + ] + } + } + ] + }, + "intent_template_id": 55, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/generic-phone-case-compatible-with-iphone-sebastian-scratch-stan-accessories-the-shock-winter-waterproof-soldier-6-7-8-plus-se-2020-x-xr-11-pro-max-12-mini-transparent.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 183, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me {{category}} that is of the same brand as products in one of the following pictures and buy {{quantity}} for me. It needs to {{description}}.", + "intent": "Find me a \ud83d\udcfa that is of the same brand as products in one of the following pictures and buy 3\ufe0f\u20e3 for me. It needs to have price between $4000 and $10000.", + "image": [ + "https://images.pexels.com/photos/2942361/pexels-photo-2942361.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "https://images.pexels.com/photos/16121799/pexels-photo-16121799/free-photo-of-close-up-of-cellphone.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1" + ], + "instantiation_dict": { + "category": "a \ud83d\udcfa", + "quantity": "3\ufe0f\u20e3", + "description": "have price between $4000 and $10000" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:shopping_get_order_product_quantity(__page__,'B08H5STXXC |OR| B07YGWT3KV |OR| B08X4XCJ4T |OR| B0959NDNJV |OR| B09GMQ72RH |OR| B098KLX2R8')", + "required_contents": { + "required_values": [ + "== 3" + ] + } + } + ] + }, + "intent_template_id": 54, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 184, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I really like {{stuff}}. Can you find me {{product}} and buy {{quantity}} for me?", + "intent": "I really like this drink. Can you find me mochi that have flavor of this drink and buy 4*2 of them for me?", + "image": [ + "https://images.pexels.com/photos/12187693/pexels-photo-12187693.jpeg" + ], + "instantiation_dict": { + "stuff": "this drink", + "product": "mochi that have flavor of this drink", + "quantity": "4*2 of them" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:shopping_get_order_product_quantity(__page__,'B08PHZ6G4J')", + "required_contents": { + "required_values": [ + "== 8" + ] + } + }, + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:shopping_get_order_product_option(__page__, 'B08PHZ6G4J', 'Flavor Name')", + "required_contents": { + "must_include": [ + "Bubble Milk Tea" + ] + } + } + ] + }, + "intent_template_id": 56, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/royal-family-2-packs-of-mini-mochi-individually-wrapped-japenese-taiwanese-asia-sweet-rice-wagashi-snack-dessert-dagashi-candies-cake-candy-drinks-daifuku-bubble-milk-tea-total-8-4oz.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 185, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I really like {{stuff}}. Can you find me {{product}} and buy {{quantity}} for me?", + "intent": "I really like this animal's pattern. Can you find me a twin xl size bedding set that have the same pattern and buy two for me?", + "image": [ + "https://images.pexels.com/photos/267074/pexels-photo-267074.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1" + ], + "instantiation_dict": { + "stuff": "this animal's pattern", + "product": "a twin xl size bedding set that have the same pattern", + "quantity": "two" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:shopping_get_order_product_quantity(__page__,'B08SQJF7PF')", + "required_contents": { + "required_values": [ + "== 2" + ] + } + }, + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:shopping_get_order_product_option(__page__, 'B08SQJF7PF', 'Size')", + "required_contents": { + "must_include": [ + "Twin XL" + ] + } + } + ] + }, + "intent_template_id": 56, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/girls-leopard-print-bedding-set-teens-luxury-pink-black-cheetah-print-bed-sheet-set-twin-size-for-kids-children-women-bedroom-decor-fitted-sheet-chic-wild-animal-bed-cover-with-1-pillow-case.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 186, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Can you find me {{category}} that is the combination of the following images and buy it for me?", + "intent": "Can you find me a sport equipment that is the combination of the following images and buy it for me?", + "image": [ + "https://images.pexels.com/photos/1537671/pexels-photo-1537671.jpeg", + "https://images.pexels.com/photos/2883462/pexels-photo-2883462.jpeg" + ], + "instantiation_dict": { + "category": "a sport equipment" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector('.order-details-items.ordered').outerText", + "required_contents": { + "must_include": [ + "roller skate |OR| cycling shoes |OR| bike shoes |OR| bicycle shoes" + ] + } + } + ] + }, + "intent_template_id": 57, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/sxzhsm-roller-skates-for-women-outdoor-parkour-shoes-with-wheels-for-girls-boys-kick-rollers-shoes-retractable-adults-kids-quad-roller-skates-men-unisex-skating-shoes-recreation-sneakers-eu39-white.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 187, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add the least expensive {{item}} that can hold {{storable}} to my shopping cart.", + "instantiation_dict": { + "item": "opaque phone case", + "storable": "an iPhone X" + }, + "intent": "Add the least expensive opaque phone case that can hold an iPhone X to my shopping cart.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Asuwish Compatible with iPhone Xs X 10 10s Wallet Case Tempered Glass Screen Protector Leather Flip Cover Card Holder Phone Cases for iPhoneX iPhoneXs iPhone10 i PhoneX SX 10x 10xs X\u2019s Women Men Black" + ] + } + } + ] + }, + "intent_template_id": 58, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 188, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Add the least expensive {{item}} that can hold {{storable}} to my shopping cart.", + "instantiation_dict": { + "item": "car", + "storable": "4 people" + }, + "intent": "Add the least expensive car that can hold 4 people to my shopping cart.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There are no cars available on OneStopMarket.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 58, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 189, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Can you add {{items}} to the comparison page?", + "instantiation_dict": { + "items": "the two most expensive fully black VR headsets in the \"Virtual Reality\" category" + }, + "intent": "Can you add the two most expensive fully black VR headsets in the \"Virtual Reality\" category to the comparison page?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/catalog/product_compare", + "locator": "", + "required_contents": { + "must_include": [ + "ZJJY VR Headsets, Virtual Reality Headset, Bluetooth Controller for iPhone 11/Pro/X/Xs/Max/XR/8P/7P,for Samsung S20/S10/S9/S8/Plus/Note 10/9/8,Phones w/ 4.5-6.2in Screen, L025xq", + "ZNBJJWCP 3D VR Glasses Headset Virtual Reality Goggles Play Movies Photos Enjoyment for Smartphones" + ] + } + } + ] + }, + "intent_template_id": 33, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "Medium visual difficuly because the most expensive headset has some blue lighting effects on it." + }, + { + "sites": [ + "shopping" + ], + "task_id": 190, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Can you add {{items}} to the comparison page?", + "instantiation_dict": { + "items": "the two most expensive white projectors in the \"Video Projectors\" category" + }, + "intent": "Can you add the two most expensive white projectors in the \"Video Projectors\" category to the comparison page?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/catalog/product_compare", + "locator": "", + "required_contents": { + "must_include": [ + "Samsung Ultra Short Throw Laser Projector - 3840 x 2160 - Front - 20000 Hour Normal Mode4K UHD - 2,000,000:1-2800 lm (Renewed)", + "Panasonic LCD Projector - 720p - HDTV - 4:3 PT-EX800ZLU" + ] + } + } + ] + }, + "intent_template_id": 33, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 191, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "I've been thinking about ordering {{item}}, but {{condition}}. Can you add it to my shopping cart if it exists?", + "instantiation_dict": { + "item": "a donkey planter", + "condition": "only if it can hold 2 pots" + }, + "intent": "I've been thinking about ordering a donkey planter, but only if it can hold 2 pots. Can you add it to my shopping cart if it exists?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Design Toscano NG32766 Pancho the Burro Planter Sculpture,full color" + ] + } + } + ] + }, + "intent_template_id": 59, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 192, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/roasemal-back-scrubber-for-shower-silicone-back-scrubber-for-shower-dual-sided-natural-soft-and-stiff-massage-bristles-for-body-exfoliating-purple.html", + "geolocation": null, + "intent_template": "What is the width and length of this product in inches? (Not the package size)", + "instantiation_dict": {}, + "intent": "What is the width and length of this product in inches? (Not the package size)", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "26.77", + "4.33" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "26.77 in x 4.33 in" + }, + "intent_template_id": 60, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "Easier to perform OCR on the second image to get the dimensions." + }, + { + "sites": [ + "shopping" + ], + "task_id": 193, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/cell-phones-accessories/accessories.html?p=10", + "geolocation": null, + "intent_template": "How much is the {{item}} with {{secondary_item}} in the picture on this page?", + "instantiation_dict": { + "item": "VR headset", + "secondary_item": "a dinosaur" + }, + "intent": "How much is the VR headset with a dinosaur in the picture on this page?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "$63.98" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$63.98" + }, + "intent_template_id": 61, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 194, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/patio-lawn-garden/gardening-lawn-care/pots-planters-container-accessories.html", + "geolocation": null, + "intent_template": "How much is the {{item}} with {{secondary_item}} in the picture on this page?", + "instantiation_dict": { + "item": "product", + "secondary_item": "the most plants" + }, + "intent": "How much is the product with the most plants in the picture on this page?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "$37.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$37.99" + }, + "intent_template_id": 61, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 195, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sports-outdoors/fan-shop/footwear.html?p=5", + "geolocation": null, + "intent_template": "How much is the {{item}} with {{secondary_item}} in the picture on this page?", + "instantiation_dict": { + "item": "shoe", + "secondary_item": "black pants" + }, + "intent": "How much is the shoe with black pants in the picture on this page?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "$26.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$26.99" + }, + "intent_template_id": 61, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 196, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/food-beverage-gifts/herb-spice-seasoning-gifts.html?product_list_mode=list", + "geolocation": null, + "intent_template": "Can you add the item {{offset}} the {{item}} to my cart?", + "instantiation_dict": { + "offset": "two rows below", + "item": "bagel seasoning" + }, + "intent": "Can you add the item two rows below the bagel seasoning to my cart?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Yemeni Hawayij" + ] + } + } + ] + }, + "intent_template_id": 62, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 197, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sports-outdoors/fan-shop/clothing.html?p=10", + "geolocation": null, + "intent_template": "Can you add the item {{offset}} the {{item}} to my cart?", + "instantiation_dict": { + "offset": "one row up and one column left of", + "item": "red and white polka dot romper" + }, + "intent": "Can you add the item one row up and one column left of the red and white polka dot romper to my cart?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Sweatshirt for Men Long Sleeve Turn Down Collar Spring Summer Printed Casual Slim Fit Button Down Shirts Tops T-Shirts" + ] + } + } + ] + }, + "intent_template_id": 62, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 198, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/office-products/office-electronics/printers-accessories.html?product_list_limit=36", + "geolocation": null, + "intent_template": "Can you add the item {{offset}} the {{item}} to my cart?", + "instantiation_dict": { + "offset": "in the same column but at the bottom of the page as", + "item": "HP M225DW LaserJet Pro MFP printer" + }, + "viewport_size": { + "width": 1280 + }, + "intent": "Can you add the item in the same column but at the bottom of the page as the HP M225DW LaserJet Pro MFP printer to my cart?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Flat Scanner Cable for HP LJ Pro M1212NF 1212 1213 1216 1217 1218 121X Printer" + ] + } + } + ] + }, + "intent_template_id": 62, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 199, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/home-decor-products/home-decor-accents.html", + "geolocation": null, + "intent_template": "How much does the {{item}} weigh (in {{units}})?", + "instantiation_dict": { + "item": "product with an animal on a cube", + "units": "pounds" + }, + "intent": "How much does the product with an animal on a cube weigh (in pounds)?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1.72 |OR| 1.5" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1.72 pounds" + }, + "intent_template_id": 63, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 200, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/dairy-cheese-eggs/cheese.html", + "geolocation": null, + "intent_template": "How much does the {{item}} weigh (in {{units}})?", + "instantiation_dict": { + "item": "cheese with the orange rind", + "units": "pounds" + }, + "intent": "How much does the cheese with the orange rind weigh (in pounds)?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1 pounds" + }, + "intent_template_id": 63, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 201, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/video-games/playstation-4/accessories.html?p=2", + "geolocation": null, + "intent_template": "How much does the {{item}} weigh (in {{units}})?", + "instantiation_dict": { + "item": "PS4 sticker with a skull", + "units": "ounces" + }, + "intent": "How much does the PS4 sticker with a skull weigh (in ounces)?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "3.84" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3.84 ounces" + }, + "intent_template_id": 63, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 202, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/patio-lawn-garden/patio-furniture-accessories.html", + "geolocation": null, + "intent_template": "What colors do the {{item}} come in?", + "instantiation_dict": { + "item": "patio set with the mountains in the background" + }, + "intent": "What colors do the patio set with the mountains in the background come in?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Aegean Blue", + "Black&khaki |OR| Black & Khaki", + "Blue", + "Red", + "Royal Blue" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Aegean Blue, Black & Khaki, Blue, Red, Royal Blue" + }, + "intent_template_id": 64, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 203, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sports-outdoors/exercise-fitness.html?p=6", + "geolocation": null, + "intent_template": "Can you return an image url from an item on this page showing {{action}} {{item}}?", + "instantiation_dict": { + "action": "how a phone might be stored", + "item": "with the red shorts" + }, + "intent": "Can you return an image url from an item on this page showing how a phone might be stored with the red shorts?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B09QQNVKTN.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09QQNVKTN.1.jpg" + }, + "intent_template_id": 65, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 204, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sports-outdoors/exercise-fitness.html?price=0-100&product_list_limit=36", + "geolocation": null, + "intent_template": "Can you return an image url from an item on this page showing {{action}} {{item}}?", + "instantiation_dict": { + "action": "how someone might touch their toes", + "item": "in the gray tank top" + }, + "intent": "Can you return an image url from an item on this page showing how someone might touch their toes in the gray tank top?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no image of the guy in the gray tank top touching his toes.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 65, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 205, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/bedding/kids-bedding.html", + "geolocation": null, + "intent_template": "Can you return an image url from an item on this page showing {{action}} {{item}}?", + "instantiation_dict": { + "action": "how someone might cuddle their dog", + "item": "in the unicorn blanket" + }, + "intent": "Can you return an image url from an item on this page showing how someone might cuddle their dog in the unicorn blanket?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B09MB3WDF5.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09MB3WDF5.1.jpg" + }, + "intent_template_id": 65, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 206, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/bedding/kids-bedding.html?p=4", + "geolocation": null, + "intent_template": "Can you return an image url from an item on this page showing {{action}} {{item}}?", + "instantiation_dict": { + "action": "how", + "item": "the celebrity blanket might look when folded" + }, + "intent": "Can you return an image url from an item on this page showing how the celebrity blanket might look when folded?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B08L4ZV4GX.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B08L4ZV4GX.1.jpg" + }, + "intent_template_id": 65, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 207, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/breakfast-foods/cereals.html?p=5", + "geolocation": null, + "intent_template": "What is the name of the monster on that {{color}} box in the bottom row?", + "instantiation_dict": { + "color": "blue" + }, + "intent": "What is the name of the monster on that blue box in the bottom row?", + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Boo Berry" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Boo Berry" + }, + "intent_template_id": 66, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 208, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/breakfast-foods/cereals.html?p=5", + "geolocation": null, + "intent_template": "What is the name of the monster on that {{color}} box in the bottom row?", + "instantiation_dict": { + "color": "brown" + }, + "viewport_size": { + "width": 1280 + }, + "intent": "What is the name of the monster on that brown box in the bottom row?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Count Chocula |OR| COUNT CHOCULA" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Count Chocula" + }, + "intent_template_id": 66, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 209, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/breakfast-foods/cereals.html?p=5", + "geolocation": null, + "intent_template": "What is the name of the monster on that {{color}} box in the bottom row?", + "instantiation_dict": { + "color": "green" + }, + "viewport_size": { + "width": 1280 + }, + "intent": "What is the name of the monster on that green box in the bottom row?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no green box.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 66, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 210, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/breakfast-foods/cereals.html?p=9", + "geolocation": null, + "intent_template": "How many {{attribute}} does the box with the {{object}} have?", + "instantiation_dict": { + "attribute": "vitamins and minerals", + "object": "unicorn" + }, + "intent": "How many vitamins and minerals does the box with the unicorn have?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "12" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "12 vitamins and minerals" + }, + "intent_template_id": 67, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 211, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/breakfast-foods/cereals.html?p=11", + "geolocation": null, + "intent_template": "How many {{attribute}} does the box with the {{object}} have?", + "instantiation_dict": { + "attribute": "grams of whole grain (per serving)", + "object": "volleyball player" + }, + "intent": "How many grams of whole grain (per serving) does the box with the volleyball player have?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "22" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "22 grams" + }, + "intent_template_id": 67, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 212, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/patio-lawn-garden/gardening-lawn-care.html?p=4", + "geolocation": null, + "intent_template": "Can you add the {{item}} with {{number}} stars to my cart?", + "instantiation_dict": { + "item": "red flower seeds", + "number": "around 4" + }, + "intent": "Can you add the red flower seeds with around 4 stars to my cart?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Red Roselle Seeds (Hibiscus sabdariffa) 50+ Tropical Hibiscus Seeds Packed in FROZEN SEED CAPSULES for Growing Seeds Now or Saving Seeds for Years" + ] + } + } + ] + }, + "intent_template_id": 68, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 213, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/patio-lawn-garden/patio-furniture-accessories.html?p=6", + "geolocation": null, + "intent_template": "Can you add the {{item}} with {{number}} stars to my cart?", + "instantiation_dict": { + "item": "patio set that has water in the background", + "number": "the most" + }, + "intent": "Can you add the patio set that has water in the background with the most stars to my cart?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Tuoze 6 Pieces Patio Furniture Sectional Outdoor All Weather PE Rattan Wicker Lawn Conversation Cushioned Garden Sofa Set with Glass Coffee Table (Beige)" + ] + } + } + ] + }, + "intent_template_id": 68, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 214, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/wall-art/posters-prints.html?p=23", + "geolocation": null, + "intent_template": "Can you add the {{item}} with {{number}} stars to my cart?", + "instantiation_dict": { + "item": "photo collage", + "number": "a little more than 3" + }, + "intent": "Can you add the photo collage with a little more than 3 stars to my cart?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Collage Photo Canvas Prints Custom Multi Personalized Picture with Your Kids Family Wedding Images Customized Wall Art Home Decor for Living Room Bedroom Digital Printed Gifts" + ] + } + } + ] + }, + "intent_template_id": 68, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 215, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/wall-art/posters-prints.html?p=23", + "geolocation": null, + "intent_template": "Can you add the {{item}} with {{number}} stars to my cart?", + "instantiation_dict": { + "item": "flower picture", + "number": "4.5" + }, + "intent": "Can you add the flower picture with 4.5 stars to my cart?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Flowers Home Decorations 3 Panels Purple Lavender Flower Artwork for Living Room Office Bathroom Wall Decor Posters and Prints Frame to Hang (Lavender-2, 12*16inch)" + ] + } + } + ] + }, + "intent_template_id": 68, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 216, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/beauty-personal-care/foot-hand-nail-care/nail-art-polish.html?p=5", + "geolocation": null, + "intent_template": "Can you add {{item}} {{condition}} to my wishlist?", + "instantiation_dict": { + "item": "the one", + "condition": "that looks like a microwave" + }, + "intent": "Can you add the one that looks like a microwave to my wishlist?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "JJ CARE Sterilizer for Salon 15L Capacity, 2-in1 Sanitizer Box and Dry Heat Sterilizer Cabinet Tabletop with Stainless Tray, Sterilizer Cabinet Sanitizing Machine" + ] + } + } + ] + }, + "intent_template_id": 69, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 217, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/beauty-personal-care/foot-hand-nail-care/nail-art-polish.html?p=5", + "geolocation": null, + "intent_template": "What colors do the {{item}} come in?", + "instantiation_dict": { + "item": "mini ring trays" + }, + "intent": "What colors do the mini ring trays come in?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "purple |OR| violet", + "yellow |OR| gold", + "black", + "pink" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Purple, yellow, black, and pink" + }, + "intent_template_id": 64, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 218, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/health-household/diet-sports-nutrition/nutrition-bars-drinks.html?p=3", + "geolocation": null, + "intent_template": "What colors do the {{item}} come in?", + "instantiation_dict": { + "item": "bars in the bottom left" + }, + "intent": "What colors do the bars in the bottom left come in?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "green", + "blue", + "pink", + "yellow |OR| gold" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Green, blue, pink, and yellow" + }, + "intent_template_id": 64, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 219, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry/men/uniforms-work-safety.html", + "geolocation": null, + "intent_template": "What {{query}} {{question}}?", + "instantiation_dict": { + "query": "are the two types of birds", + "question": "on the front of that colorful shirt" + }, + "intent": "What are the two types of birds on the front of that colorful shirt?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "toucan", + "parrot" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "A toucan and parrots" + }, + "intent_template_id": 70, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 220, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/beauty-personal-care/tools-accessories/mirrors.html", + "geolocation": null, + "intent_template": "How much is the {{item}} with {{secondary_item}} in the picture on this page?", + "instantiation_dict": { + "item": "mirror", + "secondary_item": "the charging cable" + }, + "intent": "How much is the mirror with the charging cable in the picture on this page?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "$30.88" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$30.88" + }, + "intent_template_id": 61, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 221, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Can you add the most expensive {{category}} to my cart that is less than ${{dollars}} and {{color}}?", + "instantiation_dict": { + "category": "smartwatch (from the \"Smartwatches\" category)", + "dollars": "200", + "color": "not black" + }, + "intent": "Can you add the most expensive smartwatch (from the \"Smartwatches\" category) to my cart that is less than $200 and not black?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "SAMSUNG Galaxy Watch Active 2 (40mm, GPS, Bluetooth) Smart Watch with Advanced Health Monitoring, Fitness Tracking, and Long Lasting Battery - Rose Gold (US Version) (Renewed)" + ] + } + } + ] + }, + "intent_template_id": 71, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 222, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Can you add the most expensive {{category}} to my cart that is less than ${{dollars}} and {{color}}?", + "instantiation_dict": { + "category": "USB adapter for a Switch", + "dollars": "113", + "color": "looks like bricks" + }, + "intent": "Can you add the most expensive USB adapter for a Switch to my cart that is less than $113 and looks like bricks?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "8Bitdo Wireless USB Adapter 1 for Switch, Switch OLED, Windows, Mac & Raspberry Pi, Compatible with Switch Pro, Switch Joy-con, PS5 PS4 Controller and More" + ] + } + } + ] + }, + "intent_template_id": 71, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 223, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Can you add all the {{items}} {{condition}} to my shopping cart?", + "intent": "Can you add all the amiibo figures that cost less than $50 to my shopping cart?", + "instantiation_dict": { + "items": "amiibo figures that cost", + "condition": "less than $50" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "amiibo SAMUS/E.M.M.I. 2-in-1 Pack (Nintendo Switch)", + "Nintendo amiibo - Ice Climbers - Super Smash Bros. Series", + "Nintendo Amiibo - Chrom - Super Smash Bros. Series - Switch", + "Nintendo Amiibo - Link: The Legend of Zelda: Link's Awakening Series - Switch" + ] + } + } + ] + }, + "intent_template_id": 72, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 224, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "{{information}}. Can you add {{items}} to my cart?", + "intent": "I did this activity last weekend. Can you add the amiibo most likely to do this to my cart?", + "instantiation_dict": { + "information": "I did this activity last weekend", + "items": "the amiibo most likely to do this" + }, + "image": "https://images.pexels.com/photos/12985963/pexels-photo-12985963.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Nintendo amiibo - Ice Climbers - Super Smash Bros. Series" + ] + } + } + ] + }, + "intent_template_id": 73, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 225, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/beauty-personal-care/tools-accessories/mirrors.html?p=3", + "geolocation": null, + "intent_template": "Can you add the product that looks most like {{emojis}} to my cart?", + "instantiation_dict": { + "emojis": "\ud83e\udd0d\ud83e\udd0d\ud83e\udd0d" + }, + "intent": "Can you add the product that looks most like \ud83e\udd0d\ud83e\udd0d\ud83e\udd0d to my cart?", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "FRCOLOR 3pcs Hand Mirror Heart Handheld Mirror Cosmetic Mirror with Handle for Women Travel Hairdresser Salon" + ] + } + } + ] + }, + "intent_template_id": 74, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 226, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/cell-phones-accessories/cases-holsters-sleeves/basic-cases.html?p=5", + "geolocation": null, + "intent_template": "Can you add the product that looks most like {{emojis}} to my cart?", + "instantiation_dict": { + "emojis": "\ud83d\udc04" + }, + "intent": "Can you add the product that looks most like \ud83d\udc04 to my cart?", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "KANGHAR iPhone 12 Tire Cow,iPhone 12 Pro Tire Case Black White Slim Anti-Scratch Shockproof Skid Outline Durable PC Layer TPU Bumper Anti-Dropping Full Body Protection Cover -6.1 Inch" + ] + } + } + ] + }, + "intent_template_id": 74, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 227, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/wall-art.html?p=6", + "geolocation": null, + "intent_template": "Can you add the product that looks most like {{emojis}} to my cart?", + "instantiation_dict": { + "emojis": "\ud83d\udc80" + }, + "intent": "Can you add the product that looks most like \ud83d\udc80 to my cart?", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Canvas Prints All is Vanity Wood Framed Canvas Wall Art Home Office Decor Ready to Hang 16\"x20\"" + ] + } + } + ] + }, + "intent_template_id": 74, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 228, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/wall-art.html?p=14", + "geolocation": null, + "intent_template": "Can you add the product that looks most like {{emojis}} to my cart?", + "instantiation_dict": { + "emojis": "a single \ud83c\udf3b" + }, + "intent": "Can you add the product that looks most like a single \ud83c\udf3b to my cart?", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Bathroom Decor Artwork Black and White Animal Resting Elephant Look Sunflower Field Framed Wall Art Giclee Wall Decor on Canvas Stretched Artwork Living Room Bedroom Ready to Hang,11x14in" + ] + } + } + ] + }, + "intent_template_id": 74, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 229, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/wall-art/posters-prints.html?p=24", + "geolocation": null, + "intent_template": "Can you add the product that looks most like {{emojis}} to my cart?", + "instantiation_dict": { + "emojis": "this pattern: \ud83c\udded\ud83c\uddfa" + }, + "intent": "Can you add the product that looks most like this pattern: \ud83c\udded\ud83c\uddfa to my cart?", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "When Sick Kids Should Stay Home from School Poster - Laminated, 12x18 in. - Pre-K, Home Daycare Supplies - Elementary School Nurse Office Decor - Pediatric Health" + ] + } + } + ] + }, + "intent_template_id": 74, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 230, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sports-outdoors/fan-shop/clothing.html?p=2", + "geolocation": null, + "intent_template": "Can you add the {{item}} that look like they are from {{reference}} to my wishlist?", + "instantiation_dict": { + "item": "pants", + "reference": "the Twister game" + }, + "intent": "Can you add the pants that look like they are from the Twister game to my wishlist?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "DZQUY Men's Classic Plaid Pants Chinos Stretch Slim Fit Lattice Trousers Casual Running Jogger Business Pencil Dress Pants" + ] + } + } + ] + }, + "intent_template_id": 75, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 231, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sports-outdoors/fan-shop/clothing.html?p=6", + "geolocation": null, + "intent_template": "Can you add the {{item}} that look like they are from {{reference}} to my wishlist?", + "instantiation_dict": { + "item": "boxers", + "reference": "space" + }, + "intent": "Can you add the boxers that look like they are from space to my wishlist?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Men's Boxer Briefs Soft Cotton Boxers Cozy Trunks Breathable Underpants Sexy Low Rise Underwear Stretch Ice Silk Panties" + ] + } + } + ] + }, + "intent_template_id": 75, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 232, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/video-games/playstation-4/accessories.html?p=9", + "geolocation": null, + "intent_template": "Can you add the {{item}} that look like they are from {{reference}} to my wishlist?", + "instantiation_dict": { + "item": "decals", + "reference": "an anime" + }, + "intent": "Can you add the decals that look like they are from an anime to my wishlist?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Vanknight Vinyl Decal Skin Stickers Cover for PS4 Console Playstation 2 Controllers" + ] + } + } + ] + }, + "intent_template_id": 75, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 233, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/video-games/xbox-one/accessories.html?p=4", + "geolocation": null, + "intent_template": "Between {{item1}} and {{item2}}, add the cheaper one to my cart and the other to my wishlist.", + "instantiation_dict": { + "item1": "the red and black controller", + "item2": "the controller with green accents" + }, + "intent": "Between the red and black controller and the controller with green accents, add the cheaper one to my cart and the other to my wishlist.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Microsoft Xbox One Wireless Controller - Volcano Shadow (Renewed)" + ] + } + }, + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Wired Controller for Xbox One, High Performance USB Gamepad Remote Joystick Controller with Dual Vibration and Audio Jack for Xbox One/X/S and PC Windows 7/8/10 (Black)" + ] + } + } + ] + }, + "intent_template_id": 76, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 234, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/kitchen-dining/kitchen-table-linens.html", + "geolocation": null, + "intent_template": "Between {{item1}} and {{item2}}, add the cheaper one to my cart and the other to my wishlist.", + "instantiation_dict": { + "item1": "the second item in the top row", + "item2": "the one below it" + }, + "intent": "Between the second item in the top row and the one below it, add the cheaper one to my cart and the other to my wishlist.", + "viewport_size": { + "width": 1280 + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "ARTSHOWING Anchor Table Runner and Placemats Set of 6, Burlap Linen Table Runners 13x90inch, Heat-Insulating Placemats for Table Decor, Love The Life You Live" + ] + } + }, + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "13x90'' Table Runner with Placemats Set of 6 Live Every Moment Laugh Every Day Love Beyond Words Honeycomb Yellow Background Non-Slip Washable Table Mats Table Runner Set for Dining Table Home Decor" + ] + } + } + ] + }, + "intent_template_id": 76, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 235, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/kitchen-dining/kitchen-table-linens.html?p=4", + "geolocation": null, + "intent_template": "Between {{item1}} and {{item2}}, add the cheaper one to my cart and the other to my wishlist.", + "instantiation_dict": { + "item1": "the Japanese style tablecover", + "item2": "the 3 leaf clover one" + }, + "intent": "Between the Japanese style tablecover and the 3 leaf clover one, add the cheaper one to my cart and the other to my wishlist.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "GGACEN Decorative Elastic Edged Square Fitted Tablecloth,Miyajima Torii Sumo and Flag Polyester Indoor Outdoor Fitted Tablecover for Banquet Buffet Kitchen Dining and Party Fit Square Table up to 30\"" + ] + } + }, + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "St. Patrick's Day Table Runner 36 Inches Long Farmhouse Dress Scarves, Green Clover Black and White Buffalo Plaid Dining Tablerunner for Coffee Table/Kitchen Island/Party D\u00e9cor, Small 13\" x 36\"" + ] + } + } + ] + }, + "intent_template_id": 76, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 236, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/customer/account/", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "Can you set the {{side}} side address to {{address}}? My phone number is also {{number}}.", + "instantiation_dict": { + "side": "right", + "address": "5000 Forbes Ave, Pittsburgh, PA 15213", + "number": "(412) 268-2000" + }, + "intent": "Can you set the right side address to 5000 Forbes Ave, Pittsburgh, PA 15213? My phone number is also (412) 268-2000.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/customer/account/", + "locator": "lambda:(() => { try { return document.querySelector('#maincontent > div.columns > div.column.main > div.block.block-dashboard-addresses > div.block-content > div.box.box-shipping-address > div.box-content > address').textContent } catch (e) { return '' }})()", + "required_contents": { + "must_include": [ + "5000 Forbes Ave", + "Pittsburgh", + "Pennsylvania", + "15213", + "(412) 268-2000 |OR| 4122682000 |OR| 412-268-2000" + ] + } + } + ] + }, + "intent_template_id": 77, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 237, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/customer/account/", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "Can you set the {{side}} side address to {{address}}? My phone number is also {{number}}.", + "instantiation_dict": { + "side": "left", + "address": "201 N Goodwin Ave in Urbana Illinois 61801", + "number": "2173333426" + }, + "intent": "Can you set the left side address to 201 N Goodwin Ave in Urbana Illinois 61801? My phone number is also 2173333426.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/customer/account/", + "locator": "lambda:(() => { try { return document.querySelector('#maincontent > div.columns > div.column.main > div.block.block-dashboard-addresses > div.block-content > div.box.box-billing-address > div.box-content > address').textContent } catch (e) { return '' }})()", + "required_contents": { + "must_include": [ + "201 N Goodwin Ave", + "Urbana", + "Illinois", + "61801", + "(217) 333-3426 |OR| 2173333426 |OR| 217-333-3426" + ] + } + } + ] + }, + "intent_template_id": 77, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 238, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/cell-phones-accessories/cases-holsters-sleeves/flip-cases.html?p=3", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "From {{location}}, can you add the two items with the best ratings to my cart, and the others to my wishlist?", + "instantiation_dict": { + "location": "the third column" + }, + "intent": "From the third column, can you add the two items with the best ratings to my cart, and the others to my wishlist?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "SafeSleeve EMF Protection Anti Radiation iPhone Case: iPhone 12 and iPhone 12 Pro RFID EMF Blocking Wallet Cell Phone Case (Black)" + ] + } + }, + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "ONETOP Compatible with iPhone 12 Compatible with iPhone 12 Pro Wallet Case with Card Holder, PU Leather Kickstand Card Slots Case, Double Magnetic Clasp Durable Shockproof Cover 6.1 Inch(Blue)", + "LUPA iPhone X Case with Card Holder -Slim & Lightweight iPhone X Wallet Case - for Women & Men - Faux Leather - iPhone Xs Cases with Magnetic Closure\u2013 Light Brown (5.8 Inch Diagonal Screen Size)" + ] + } + } + ] + }, + "intent_template_id": 78, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 239, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/cell-phones-accessories/cases-holsters-sleeves/basic-cases.html?p=12", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "From {{location}}, can you add the two items with the best ratings to my cart, and the others to my wishlist?", + "instantiation_dict": { + "location": "the top row" + }, + "intent": "From the top row, can you add the two items with the best ratings to my cart, and the others to my wishlist?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Samsung Galaxy A71 5G Case Flash Ultrathin Silicone Soft Case, Flower, Intimate Butterfly Flash Stand, Samsung A71 5G Mobile Case (not Applicable to A71 4G) (Rose Gold)", + "Square iPhone 11 Case,Tzomsze Cute Full Camera Lens Protection & Electroplate Reinforced Corners Shockproof Edge Bumper Case Compatible with iPhone 11 [6.1 inches] -Candy Pink" + ] + } + }, + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Case for iPhone SE 2020 iPhone 7 8 for Boys Teen, Itsfaxinke Soft Sturdy Slim Anti Scratch Durable Shell Nonslip Flexible iPhone SE2 2nd Generation Phone Cover (RBshark)", + "GVIEWIN Designed for iPhone 13 Case 6.1 Inch 2021, Clear Flower Soft & Flexible TPU Shockproof Women Girls Phone Cover Floral Pattern Design Bumper Protective Case\uff08Magnolia/White\uff09" + ] + } + } + ] + }, + "intent_template_id": 78, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 240, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/video-games/legacy-systems/playstation-systems.html?p=2&product_list_limit=15&product_list_mode=list", + "geolocation": null, + "intent_template": "Can you add {{item}} {{condition}} to my wishlist?", + "instantiation_dict": { + "item": "the items", + "condition": "corresponding to the prime numbered positions on this page" + }, + "intent": "Can you add the items corresponding to the prime numbered positions on this page to my wishlist?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Orange Chrome Mirror Vinyl Decal Faceplate Mod Skin Kit for Sony PlayStation Portable 3000 Console by System Skins", + "PS3 Controller Wireless Bluetooth PlayStation 3 Remote with Charger Cable (White+Blue)", + "UUShop Protective Vinyl Skin Sticker Decal Warp for Sony Playstation VR Skull", + "Extreme Sim Racing Wheel Stand Cockpit SXT V2 Racing Simulator - Racing Wheel Stand Black Edition For Logitech G25, G27, G29, G920, Thrustmaster And Fanatec - Heavy Dutty and Foldable", + "PS4 Slim Skins - Decals for PS4 Controller Playstation 4 Slim - Stickers Cover for PS4 Slim Controller Sony Playstation Four Slim Accessories with Dualshock 4 Two Controllers Skin - Deadpool", + "Game Controller Stand Holder Wall Mount - 2 Pack for Gaming&Audio Headsets Holder Hook, Controller Headset Stand Holder for PS5/PS4/Xbox Gamer Gifts,No Screws, Easy to Install - Black" + ] + } + } + ] + }, + "intent_template_id": 69, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 241, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/deli-prepared-foods/deli-meats-cheeses.html", + "geolocation": null, + "intent_template": "Can you return an image link showing the {{item}} in another angle?", + "instantiation_dict": { + "item": "Serrano Ham Bone" + }, + "intent": "Can you return an image link showing the Serrano Ham Bone in another angle?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B074DKNVPL.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B074DKNVPL.1.jpg" + }, + "intent_template_id": 79, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 242, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/heating-cooling-air-quality.html?p=4", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "Can you return an image link of the {{item}} in another angle?", + "instantiation_dict": { + "item": "fireplace in the bottom left" + }, + "intent": "Can you return an image link of the fireplace in the bottom left in another angle?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B07H3K871G.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B07H3K871G.1.jpg" + }, + "intent_template_id": 80, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 243, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sports-outdoors/fan-shop/clothing.html?p=8", + "geolocation": null, + "intent_template": "Can you return an image link of the {{item}} in another angle?", + "instantiation_dict": { + "item": "black and white mens shirt" + }, + "intent": "Can you return an image link of the black and white mens shirt in another angle?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B09JSYYC12.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09JSYYC12.1.jpg" + }, + "intent_template_id": 80, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 244, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/health-household/health-care.html?p=4", + "geolocation": null, + "intent_template": "{{information}}. Can you add {{items}} to my cart?", + "intent": "I am going to this place soon. Can you add the most practical item for my trip on this page to my cart?", + "instantiation_dict": { + "information": "I am going to this place soon", + "items": "the most practical item for my trip on this page" + }, + "image": "https://images.pexels.com/photos/975771/pexels-photo-975771.jpeg", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "OFF! Deep Woods Mosquito and Insect Repellent Wipes, Long lasting, 12 Individually Wrapped Wipes (1)" + ] + } + } + ] + }, + "intent_template_id": 73, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 245, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/health-household/health-care.html?p=7", + "geolocation": null, + "intent_template": "{{information}}. Can you add {{items}} to my cart?", + "intent": "I am going to this place soon. Can you add the most practical footwear for my trip on this page to my cart?", + "instantiation_dict": { + "information": "I am going to this place soon", + "items": "the most practical footwear for my trip on this page" + }, + "image": "https://images.pexels.com/photos/5721397/pexels-photo-5721397.jpeg", + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Spenco Men's Tribal Elite Sandal" + ] + } + } + ] + }, + "intent_template_id": 73, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 246, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/keychron-k10-full-size-104-keys-bluetooth-wireless-usb-wired-mechanical-gaming-keyboard-for-mac-with-gateron-brown-switch-multitasking-white-led-backlight-computer-keyboard-for-windows-laptop.html", + "geolocation": null, + "intent_template": "This would make a great birthday gift for my friend! Can you order another one on this site {{modification}} to {{address}}? My phone number is also {{number}} in case it is needed.", + "instantiation_dict": { + "modification": "from this brand that most fits his black/white themed setup", + "address": "353 Jane Stanford Way in Stanford, Cali 94305", + "number": "(650) 555-0111" + }, + "intent": "This would make a great birthday gift for my friend! Can you order another one on this site from this brand that most fits his black/white themed setup to 353 Jane Stanford Way in Stanford, Cali 94305? My phone number is also (650) 555-0111 in case it is needed.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B08CNBF28Z |OR| B08CN9ZV2N" + ] + } + }, + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \"#maincontent > div.columns > div.column.main > div.block.block-order-details-view > div.block-content > div.box.box-order-shipping-address > div > address\")", + "required_contents": { + "must_include": [ + "353 Jane Stanford Way", + "Stanford", + "California", + "94305", + "(650) 555-0111 |OR| 6505550111 |OR| 650-555-0111" + ] + } + } + ] + }, + "intent_template_id": 81, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "__SHOPPING__/keychron-c2-full-size-wired-mechanical-keyboard-for-mac-hot-swappable-gateron-g-pro-blue-switch-white-backlight-104-keys-abs-keycaps-gaming-keyboard-for-windows-type-c-braid-cable.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 247, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/shop-succulents-assorted-collection-variety-set-of-hand-selected-fully-rooted-live-indoor-succulent-plants-6-pack.html", + "geolocation": null, + "intent_template": "This would make a great birthday gift for my friend! Can you order another one on this site {{modification}} to {{address}}? My phone number is also {{number}} in case it is needed.", + "instantiation_dict": { + "modification": "of a single plant that looks most like the middle right plant in the first picture", + "address": "3235 Voigt Dr, La Jolla, CA 92093", + "number": "(858) 555-0172" + }, + "intent": "This would make a great birthday gift for my friend! Can you order another one on this site of a single plant that looks most like the middle right plant in the first picture to 3235 Voigt Dr, La Jolla, CA 92093? My phone number is also (858) 555-0172 in case it is needed.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B09QXD1DVY |OR| B07C988M51" + ] + } + }, + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \"#maincontent > div.columns > div.column.main > div.block.block-order-details-view > div.block-content > div.box.box-order-shipping-address > div > address\")", + "required_contents": { + "must_include": [ + "3235 Voigt", + "La Jolla", + "California", + "92093", + "(858) 555-0172 |OR| 8585550172 |OR| 858-555-0172" + ] + } + } + ] + }, + "intent_template_id": 81, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 248, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/snacks-sweets/chocolate.html?p=4", + "geolocation": null, + "intent_template": "Order {{product}}. If the shipping is more than 7% of the total price, leave a 3 star review mentioning it, otherwise 5.", + "instantiation_dict": { + "product": "a 6 pack of the green chocolate bars" + }, + "intent": "Order a 6 pack of the green chocolate bars. If the shipping is more than 7% of the total price, leave a 3 star review mentioning it, otherwise 5.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B09PQ6G5WL", + "6 Pack" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating(\"B09PQ6G5WL\")", + "required_contents": { + "must_include": [ + "60" + ] + } + } + ] + }, + "intent_template_id": 82, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 249, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/patio-lawn-garden/patio-furniture-accessories.html", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "Order {{product}}. If the shipping is more than 7% of the total price, leave a 3 star review mentioning it, otherwise 5.", + "instantiation_dict": { + "product": "the second thing in the top row" + }, + "intent": "Order the second thing in the top row. If the shipping is more than 7% of the total price, leave a 3 star review mentioning it, otherwise 5.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B07SYHF5R2" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating(\"B07SYHF5R2\")", + "required_contents": { + "must_include": [ + "100" + ] + } + } + ] + }, + "intent_template_id": 82, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 250, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/electronics/headphones/earbud-headphones.html?p=6", + "geolocation": null, + "intent_template": "I have {{subjects}}, can you order enough {{product}} so each can get their own?", + "instantiation_dict": { + "subjects": "twin sons", + "product": "soccer ball earbuds" + }, + "intent": "I have twin sons, can you order enough soccer ball earbuds so each can get their own?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B09G6MGFPZ" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_order_product_quantity(__page__, \"B09G6MGFPZ\")", + "required_contents": { + "required_values": [ + "== 2" + ] + } + } + ] + }, + "intent_template_id": 83, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 251, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/alcoholic-beverages/wine.html", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "I have {{subjects}}, can you order enough {{product}} so each can get their own?", + "instantiation_dict": { + "subjects": "a dozen wedding guests", + "product": "of the (non-alcoholic) white wine in the first row" + }, + "intent": "I have a dozen wedding guests, can you order enough of the (non-alcoholic) white wine in the first row so each can get their own?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B005J0KH2C" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_order_product_quantity(__page__, \"B005J0KH2C\")", + "required_contents": { + "required_values": [ + "== 12" + ] + } + } + ] + }, + "intent_template_id": 83, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 252, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/video-games/legacy-systems/nintendo-systems.html", + "geolocation": null, + "intent_template": "I have {{subjects}}, can you order enough {{product}} so each can get their own?", + "instantiation_dict": { + "subjects": "octuplets on the way", + "product": "white wii remotes" + }, + "intent": "I have octuplets on the way, can you order enough white wii remotes so each can get their own?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B091YR9XRH" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_order_product_quantity(__page__, \"B091YR9XRH\")", + "required_contents": { + "required_values": [ + "== 4" + ] + } + } + ] + }, + "intent_template_id": 83, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 253, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Can you order {{product}} and let me know the final price?", + "instantiation_dict": { + "product": "a wii remote with an M logo" + }, + "intent": "Can you order a wii remote with an M logo and let me know the final price?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "program_html" + ], + "reference_answers": { + "must_include": [ + "94.99" + ] + }, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B07BZRFNL8" + ] + } + } + ], + "string_note": "", + "reference_answer_raw_annotation": "$94.99" + }, + "intent_template_id": 84, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 254, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/office-products/office-school-supplies/desk-accessories-workspace-organizers.html", + "geolocation": null, + "intent_template": "Can you order {{product}} and let me know the final price?", + "instantiation_dict": { + "product": "the product below the lady in the red shirt" + }, + "intent": "Can you order the product below the lady in the red shirt and let me know the final price?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "program_html" + ], + "reference_answers": { + "must_include": [ + "200" + ] + }, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B073BL7NH7" + ] + } + } + ], + "string_note": "", + "reference_answer_raw_annotation": "$200.00" + }, + "intent_template_id": 84, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 255, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/office-products/office-furniture-lighting/chairs-sofas.html", + "geolocation": null, + "intent_template": "Can you order {{product}} and let me know the final price?", + "instantiation_dict": { + "product": "two of the chair with the spine graphic in black" + }, + "intent": "Can you order two of the chair with the spine graphic in black and let me know the final price?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "program_html" + ], + "reference_answers": { + "must_include": [ + "689.98" + ] + }, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B07WS7YJLJ", + "Black" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_order_product_quantity(__page__, \"B07WS7YJLJ\")", + "required_contents": { + "required_values": [ + "== 2" + ] + } + } + ], + "string_note": "", + "reference_answer_raw_annotation": "$689.98" + }, + "intent_template_id": 84, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 256, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/aroy-d-longan-in-syrup-20oz-565g-2-pack.html", + "geolocation": null, + "intent_template": "Can you add {{items}} to the comparison page?", + "instantiation_dict": { + "items": "this and the other canned fruit (of the same brand) that looks like this, but red instead of brown" + }, + "intent": "Can you add this and the other canned fruit (of the same brand) that looks like this, but red instead of brown to the comparison page?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/catalog/product_compare", + "locator": "", + "required_contents": { + "must_include": [ + "Aroy-D Longan in Syrup 20oz (565g), 2 pack", + "Aroy-d Rambutan in Syrup (Pack of 6)" + ] + } + } + ] + }, + "intent_template_id": 33, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 257, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Can you add {{item}} {{condition}} to my wishlist?", + "intent": "Can you add a light fixture (from the \"Wall Lights\" section) that looks like \ud83c\udf39 to my wishlist?", + "instantiation_dict": { + "item": "a light fixture (from the \"Wall Lights\" section)", + "condition": "that looks like \ud83c\udf39" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Accent Plus 10018784 Romantic Roses Wall Sconce, White" + ] + } + } + ] + }, + "intent_template_id": 69, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 258, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry/novelty-more/clothing.html?p=13", + "geolocation": null, + "intent_template": "Can you order the item on this page that is cheaper between {{item1}} and {{item2}} and just leave the other one in my cart? My size is {{size}}.", + "instantiation_dict": { + "item1": "the anime shirt", + "item2": "the orange text top", + "size": "large" + }, + "intent": "Can you order the item on this page that is cheaper between the anime shirt and the orange text top and just leave the other one in my cart? My size is large.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B07FDS25SN", + "Large" + ] + } + }, + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Mom Come Pick Me Up Hoodie, Sweatshirt With Word On Back Trendy, Funny Positive Word On Trendy Hoodie, Tumblr Hoodie, Quotes Shirts For Men" + ] + } + } + ] + }, + "intent_template_id": 85, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 259, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry/novelty-more/clothing.html?p=23&product_list_limit=36", + "geolocation": null, + "intent_template": "Can you order the item on this page that is cheaper between {{item1}} and {{item2}} and just leave the other one in my cart? My size is {{size}}.", + "instantiation_dict": { + "item1": "the shirt with a bloody hand (in any color)", + "item2": "the red dress", + "size": "XXL" + }, + "intent": "Can you order the item on this page that is cheaper between the shirt with a bloody hand (in any color) and the red dress and just leave the other one in my cart? My size is XXL.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B09LLW8MWL" + ] + } + }, + { + "url": "__SHOPPING__/checkout/cart", + "locator": "func:get_query_text(__page__, \"#shopping-cart-table\")", + "required_contents": { + "must_include": [ + "Floral Lace Up Gothic Punk Dress for Womens Steampunk Vintage Lolita Spaghetti Strap High Low Dresses", + "Red", + "XX-Large" + ] + } + } + ] + }, + "intent_template_id": 85, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 260, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/electronics/computers-accessories/computer-accessories-peripherals.html?product_list_order=name&product_list_limit=36", + "geolocation": null, + "intent_template": "Can you order the {{item}} {{condition}}?", + "instantiation_dict": { + "item": "charger protectors", + "condition": "if it includes a bubble tea in the pack" + }, + "intent": "Can you order the charger protectors if it includes a bubble tea in the pack?", + "require_reset": false, + "reference_url": "", + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B0953QS3ZX" + ] + } + } + ] + }, + "intent_template_id": 86, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 261, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/electronics/computers-accessories/computer-accessories-peripherals.html?product_list_order=name&product_list_limit=36", + "geolocation": null, + "intent_template": "Can you order the {{item}} {{condition}}?", + "image": "https://images.pexels.com/photos/1613621/pexels-photo-1613621.jpeg", + "instantiation_dict": { + "item": "charger protectors", + "condition": "if it includes this thing I found on the beach in the pack" + }, + "intent": "Can you order the charger protectors if it includes this thing I found on the beach in the pack?", + "require_reset": false, + "reference_url": "", + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B09PCY8QQJ" + ] + } + } + ] + }, + "intent_template_id": 86, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 262, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/beauty-personal-care/makeup/face.html?p=6&product_list_limit=24&product_list_order=name", + "geolocation": null, + "intent_template": "Can you leave a {{number}} star review on {{product}} saying \"{{review}}\"?", + "instantiation_dict": { + "number": "5", + "product": "the palette with a flower on it", + "review": "My daughter absolutely loves it!! Would recommend to anyone" + }, + "intent": "Can you leave a 5 star review on the palette with a flower on it saying \"My daughter absolutely loves it!! Would recommend to anyone\"?", + "require_reset": false, + "reference_url": "", + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__", + "locator": "func:shopping_get_sku_latest_review_rating(\"B09QYJJNW2\")", + "required_contents": { + "must_include": [ + "100" + ] + } + }, + { + "url": "__SHOPPING__", + "locator": "func:shopping_get_sku_latest_review_text(\"B09QYJJNW2\")", + "required_contents": { + "must_include": [ + "My daughter absolutely loves it!! Would recommend to anyone" + ] + } + } + ] + }, + "intent_template_id": 87, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/beauty-personal-care/makeup/face/eyeshadow/eyeshadow-palettes/colourpop-garden-variety-eyeshadow-palette.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 263, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Can you leave a {{number}} star review on {{product}} saying \"{{review}}\"?", + "instantiation_dict": { + "number": "2", + "product": "the most expensive CoComelon blanket set (from the \"Kids' Bedding\" category)", + "review": "I was expecting more for the price, started to fall apart after a few days" + }, + "intent": "Can you leave a 2 star review on the most expensive CoComelon blanket set (from the \"Kids' Bedding\" category) saying \"I was expecting more for the price, started to fall apart after a few days\"?", + "require_reset": false, + "reference_url": "", + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__", + "locator": "func:shopping_get_sku_latest_review_rating(\"B097YHDSVG\")", + "required_contents": { + "must_include": [ + "40" + ] + } + }, + { + "url": "__SHOPPING__", + "locator": "func:shopping_get_sku_latest_review_text(\"B097YHDSVG\")", + "required_contents": { + "must_include": [ + "I was expecting more for the price, started to fall apart after a few days" + ] + } + } + ] + }, + "intent_template_id": 87, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/jerfyut-cartoon-bedding-sets-twin-duvet-cover-3-piece-cute-bed-set-for-boys-girls-kid-with-1-duvet-cover-2-pillowcase-bed-sheets.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 264, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/index/?q=bonsai&product_list_order=name", + "geolocation": null, + "intent_template": "Can you leave a {{number}} star review on {{product}} saying \"{{review}}\"?", + "instantiation_dict": { + "number": "4", + "product": "the most expensive plant that looks like the hands of a clock at 6:40", + "review": "I love this plant! It's so unique and I get so many compliments on it! The only downside is that it's a little hard to take care of." + }, + "intent": "Can you leave a 4 star review on the most expensive plant that looks like the hands of a clock at 6:40 saying \"I love this plant! It's so unique and I get so many compliments on it! The only downside is that it's a little hard to take care of.\"?", + "require_reset": false, + "reference_url": "", + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__", + "locator": "func:shopping_get_sku_latest_review_rating(\"B095NHLW6F\")", + "required_contents": { + "must_include": [ + "80" + ] + } + }, + { + "url": "__SHOPPING__", + "locator": "func:shopping_get_sku_latest_review_text(\"B095NHLW6F\")", + "required_contents": { + "must_include": [ + "I love this plant! It's so unique and I get so many compliments on it! The only downside is that it's a little hard to take care of." + ] + } + } + ] + }, + "intent_template_id": 87, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 265, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Can you order the cheapest thing {{item}}?", + "image": "https://images.pexels.com/photos/14038782/pexels-photo-14038782.jpeg", + "instantiation_dict": { + "item": "like the thing on her shoulder (in the same color)" + }, + "intent": "Can you order the cheapest thing like the thing on her shoulder (in the same color)?", + "require_reset": false, + "reference_url": "", + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B000FHD7PI" + ] + } + } + ] + }, + "intent_template_id": 88, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "__SHOPPING__/sony-zsh10cp-portable-heavy-duty-cd-radio-boombox-speaker-system-discontinued-by-manufacturer.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 266, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Can you order the cheapest thing {{item}}?", + "image": "https://images.pexels.com/photos/9137660/pexels-photo-9137660.jpeg", + "instantiation_dict": { + "item": "from the \"GPS System Accessories\" category to protect the display touch screen on my car (which is in the picture)" + }, + "intent": "Can you order the cheapest thing from the \"GPS System Accessories\" category to protect the display touch screen on my car (which is in the picture)?", + "require_reset": false, + "reference_url": "", + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B07SFY2VNB" + ] + } + } + ] + }, + "intent_template_id": 88, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__SHOPPING__/lfotpp-fit-for-2019-2020-2021-silverado-1500-infotainment-3-8-inch-car-navigation-screen-protector-tempered-glass-9h-hardness-car-infotainment-display-center-touchscreen-protective-film-scratch-resistant.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 267, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/video-games/pc/accessories.html?p=4&product_list_order=price", + "geolocation": null, + "intent_template": "Can you add {{item}} to my cart and take me to the final checkout screen before placing the order?", + "instantiation_dict": { + "item": "the RGB headset" + }, + "intent": "Can you add the RGB headset to my cart and take me to the final checkout screen before placing the order?", + "require_reset": true, + "reference_url": "", + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/checkout/#payment", + "program_html": [ + { + "url": "__SHOPPING__/checkout/#payment", + "locator": "func:get_query_text(__page__, \"#opc-sidebar > div.opc-block-summary > div > div.content.minicart-items > div > ol\")", + "required_contents": { + "must_include": [ + "RAFIKI Gaming Headset with Microphone for PS4, PS5, Laptop, PC,Mobile Phone,7.1 Surround Sound Headphones,Memory Foam Ear Pads LED Lights, Self-Adjusting Over Ear Headset" + ] + } + } + ] + }, + "intent_template_id": 89, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 268, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/beauty-personal-care/skin-care/maternity.html?p=6&product_list_limit=36&product_list_order=name", + "geolocation": null, + "intent_template": "Can you add {{item}} to my cart and take me to the final checkout screen before placing the order?", + "instantiation_dict": { + "item": "the thing that says 777 in any color" + }, + "intent": "Can you add the thing that says 777 in any color to my cart and take me to the final checkout screen before placing the order?", + "require_reset": true, + "reference_url": "", + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/checkout/#payment", + "program_html": [ + { + "url": "__SHOPPING__/checkout/#payment", + "locator": "func:get_query_text(__page__, \"#opc-sidebar > div.opc-block-summary > div > div.content.minicart-items > div > ol\")", + "required_contents": { + "must_include": [ + "YYDS Women Sterling silver Digital Pendant Necklace Number Pendant Necklace Stainless Steel Friendship Necklace" + ] + } + } + ] + }, + "intent_template_id": 89, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 269, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Can you add {{item}} to my cart and take me to the final checkout screen before placing the order?", + "instantiation_dict": { + "item": "the cheapest live lobster that doesn't have a lemon in its picture" + }, + "intent": "Can you add the cheapest live lobster that doesn't have a lemon in its picture to my cart and take me to the final checkout screen before placing the order?", + "require_reset": true, + "reference_url": "", + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/checkout/#payment", + "program_html": [ + { + "url": "__SHOPPING__/checkout/#payment", + "locator": "func:get_query_text(__page__, \"#opc-sidebar > div.opc-block-summary > div > div.content.minicart-items > div > ol\")", + "required_contents": { + "must_include": [ + "Live Wild Caught Maine Lobster Approx. (1.5 lb. ea., 6 lobsters about 10lb) Express Shipped Chilled" + ] + } + } + ] + }, + "intent_template_id": 89, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 270, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/tools-home-improvement/lighting-ceiling-fans/wall-lights.html", + "geolocation": null, + "intent_template": "Can you add {{item}} to my cart and take me to the final checkout screen before placing the order?", + "instantiation_dict": { + "item": "the lamp that looks like an upside down matcha whisk" + }, + "intent": "Can you add the lamp that looks like an upside down matcha whisk to my cart and take me to the final checkout screen before placing the order?", + "require_reset": true, + "reference_url": "", + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/checkout/#payment", + "program_html": [ + { + "url": "__SHOPPING__/checkout/#payment", + "locator": "func:get_query_text(__page__, \"#opc-sidebar > div.opc-block-summary > div > div.content.minicart-items > div > ol\")", + "required_contents": { + "must_include": [ + "Baoblaze Retro Bamboo Wall Lamp Sconce E27 LED Lighting Fixtures Weave Creative Novelty Night Lights for Restaurant Home Aisle Tea Room Decoration" + ] + } + } + ] + }, + "intent_template_id": 89, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 271, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/furniture/bedroom-furniture.html?p=5&product_list_limit=36&product_list_order=name", + "geolocation": null, + "intent_template": "{{modifier}} {{item}} to my cart and order it only if the total comes out to less than {{amount}}?", + "instantiation_dict": { + "modifier": "Can you add", + "item": "the bed with a slide in white in the triple bunk + 2 drawers configuration", + "amount": "$1000" + }, + "intent": "Can you add the bed with a slide in white in the triple bunk + 2 drawers configuration to my cart and order it only if the total comes out to less than $1000?", + "require_reset": true, + "reference_url": "", + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B09QSHGPLH", + "White", + "Triple Bunk Bed with 2 Drawers" + ] + } + } + ] + }, + "intent_template_id": 90, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 272, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "{{modifier}} {{item}} to my cart and order it only if the total comes out to less than {{amount}}?", + "instantiation_dict": { + "modifier": "Can you search for \"Cheerios\", and add", + "item": "the family sized blue Cheerios cereal", + "amount": "$43" + }, + "intent": "Can you search for \"Cheerios\", and add the family sized blue Cheerios cereal to my cart and order it only if the total comes out to less than $43?", + "require_reset": true, + "reference_url": "", + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Frosted Cheerios Breakfast Cereal with Oats, Gluten Free, 18.4 oz (Pack of 7)" + ] + } + } + ] + }, + "intent_template_id": 90, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 273, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen/heating-cooling-air-quality.html?product_list_order=price#", + "geolocation": null, + "intent_template": "{{modifier}} {{item}} to my cart and order it only if the total comes out to less than {{amount}}?", + "instantiation_dict": { + "modifier": "Can you add", + "item": "the Santa humidifier", + "amount": "63 quarters" + }, + "intent": "Can you add the Santa humidifier to my cart and order it only if the total comes out to less than 63 quarters?", + "require_reset": true, + "reference_url": "", + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "4L Humidifiers for Bedroom - Top Fill Air Humidifier for Large Room, Auto Shut-off Cool Mist Humidifier, Essential-Oil Diffuser, LED Display, Constant Humidity, Mist Mode for Home, Office, Plants" + ] + } + } + ] + }, + "intent_template_id": 90, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 274, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/electronics/portable-audio-video/mp3-mp4-player-accessories.html?p=3&product_list_order=name", + "geolocation": null, + "intent_template": "Can you order the {{item}} {{condition}}?", + "instantiation_dict": { + "item": "item", + "condition": "that will make me look like \ud83d\udc31 in \ud83d\udfe6" + }, + "intent": "Can you order the item that will make me look like \ud83d\udc31 in \ud83d\udfe6?", + "require_reset": false, + "reference_url": "", + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B09PBQVWK1", + "Blue" + ] + } + } + ] + }, + "intent_template_id": 86, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 275, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Can you take me to the page that has this {{subject}} {{item}}?", + "instantiation_dict": { + "subject": "character", + "item": "on a phone case" + }, + "intent": "Can you take me to the page that has this character on a phone case?", + "image": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09MVYYCCV.0.jpg", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/anime-girls-phone-case-nezuko-phone-case-anime-phone-case-for-iphone-12-compatible-from-iphone-7-series-to-13-series-cases-comes-with-a-keychain-mdz-12.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 91, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 276, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Can you take me to the page that has this {{subject}} {{item}}?", + "instantiation_dict": { + "subject": "character", + "item": "on a PS4 controller" + }, + "intent": "Can you take me to the page that has this character on a PS4 controller?", + "image": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B0777KDV5J.0.jpg", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/vanknight-playstation-4-dualshock-ps4-controller-skin-vinyl-decals-skins-stickers-2-pack-for-ps4-controller-skins-ps4-skins-deadpool.html |OR| __SHOPPING__/ps4-slim-skins-decals-for-ps4-controller-playstation-4-slim-stickers-cover-for-ps4-slim-controller-sony-playstation-four-slim-accessories-with-dualshock-4-two-controllers-skin-deadpool.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 91, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 277, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Can you take me to the page that has this {{subject}} {{item}}?", + "instantiation_dict": { + "subject": "animal", + "item": "on a blanket" + }, + "intent": "Can you take me to the page that has this animal on a blanket?", + "image": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B001U5URUC.0.jpg", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/duise-flannel-throw-blanket-cute-donkey-pattern-print-blanket-soft-and-comfortable-flannel-decorative-throw-blankets-for-couch-sofa-bed-throw-50-x60-flannel-fleece-blanket-for-bedroom-livingroom.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 91, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 278, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Can you take me to the page that has this {{subject}} {{item}}?", + "instantiation_dict": { + "subject": "top left character", + "item": "as a flash drive" + }, + "intent": "Can you take me to the page that has this top left character as a flash drive?", + "image": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B01MXKU7JF.0.jpg", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/cute-lovely-cartoon-usb-2-0-flash-drive-8gb-minnie-mouse.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 91, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "comments": "", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 279, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Can you take me to the page that has this {{subject}} {{item}}?", + "instantiation_dict": { + "subject": "video game character", + "item": "on slippers" + }, + "intent": "Can you take me to the page that has this video game character on slippers?", + "image": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09PPRDWBX.0.jpg", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "Such an item is not available on OneStopMarket." + }, + "intent_template_id": 91, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 280, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Can you take me to the page that has this {{subject}} {{item}}?", + "instantiation_dict": { + "subject": "cute character", + "item": "on a short sleeved t-shirt" + }, + "intent": "Can you take me to the page that has this cute character on a short sleeved t-shirt?", + "image": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09GCG7RGC.0.jpg", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "Such an item is not available on OneStopMarket." + }, + "intent_template_id": 91, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 281, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry/women/shoes.html?p=6", + "geolocation": null, + "intent_template": "Can you order the shoes on this page that I would most likely wear if I was in the image{{size}}?", + "instantiation_dict": { + "size": "" + }, + "image": "https://images.pexels.com/photos/358010/pexels-photo-358010.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "intent": "Can you order the shoes on this page that I would most likely wear if I was in the image?", + "require_reset": false, + "reference_url": "", + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B0041HYWWA" + ] + } + } + ] + }, + "intent_template_id": 92, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 282, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry/women/shoes.html?p=10", + "geolocation": null, + "intent_template": "Can you order the shoes on this page that I would most likely wear if I was in the image{{size}}?", + "instantiation_dict": { + "size": "" + }, + "image": "https://images.pexels.com/photos/5859769/pexels-photo-5859769.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "intent": "Can you order the shoes on this page that I would most likely wear if I was in the image?", + "require_reset": false, + "reference_url": "", + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B09CPNW61V" + ] + } + } + ] + }, + "intent_template_id": 92, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 283, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry/women/shoes.html?p=12", + "geolocation": null, + "intent_template": "Can you order the shoes on this page that I would most likely wear if I was in the image{{size}}?", + "instantiation_dict": { + "size": " in a size 8" + }, + "image": "https://images.pexels.com/photos/1619719/pexels-photo-1619719.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "intent": "Can you order the shoes on this page that I would most likely wear if I was in the image in a size 8?", + "require_reset": false, + "reference_url": "", + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B00IGRMOAI", + "8" + ] + } + } + ] + }, + "intent_template_id": 92, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping", + "wikipedia" + ], + "task_id": 284, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Wagyu |AND| __SHOPPING__", + "geolocation": null, + "intent_template": "That {{item}} in the Wikipedia page looks delicious! Can you order {{descriptors}} for me in OneStopShop?", + "instantiation_dict": { + "item": "second picture", + "descriptors": "2 pounds of thin slices" + }, + "intent": "That second picture in the Wikipedia page looks delicious! Can you order 2 pounds of thin slices for me in OneStopShop?", + "require_reset": false, + "reference_url": "", + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B08ZS89DVV", + "Thin Slices", + "2lbs" + ] + } + } + ] + }, + "intent_template_id": 93, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 285, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I want to eat this food, but I am {{restriction}}. Can you order the closest thing to this from the \"{{category}}\" category that fits my dietary requirements?", + "instantiation_dict": { + "restriction": "vegan", + "category": "Meat Substitutes" + }, + "image": "https://images.pexels.com/photos/5602496/pexels-photo-5602496.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "intent": "I want to eat this food, but I am vegan. Can you order the closest thing to this from the \"Meat Substitutes\" category that fits my dietary requirements?", + "require_reset": false, + "reference_url": "", + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "func:get_query_text(__page__, \".order-details-items.ordered\")", + "required_contents": { + "must_include": [ + "B08KWPWC4Y" + ] + } + } + ] + }, + "intent_template_id": 94, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 286, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Can you add {{items}} to the comparison page?", + "intent": "Can you add a product like what the person is sitting on to the comparison page?", + "image": "https://images.pexels.com/photos/4050295/pexels-photo-4050295.jpeg", + "instantiation_dict": { + "items": "a product like what the person is sitting on" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "page_image_query": [ + { + "eval_image_url": "__SHOPPING__/catalog/product_compare/", + "eval_image_class": ".product-image-photo", + "eval_vqa": [ + { + "question": "Is this a sofa? (yes/no)", + "answer": "yes" + } + ] + } + ], + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "" + }, + "intent_template_id": 33, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 287, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/?q=mattress", + "geolocation": null, + "intent_template": "What is the {{size_attribute}} of the {{product}} with {{attribute}} in {{unit}}?", + "intent": "What is the width of the futon with carrot pattern in centimeters?", + "instantiation_dict": { + "size_attribute": "width", + "product": "futon", + "attribute": "carrot pattern", + "unit": "centimeters" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "90" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "90cm." + }, + "intent_template_id": 95, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 288, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/?q=reed+diffuser", + "geolocation": null, + "intent_template": "Add something like {{item}}, but {{attribute}}, from this page to my wish list.", + "intent": "Add something like the picture shown, but with rose scent, from this page to my wish list.", + "image": "https://images.pexels.com/photos/6801190/pexels-photo-6801190.jpeg", + "instantiation_dict": { + "item": "the picture shown", + "attribute": "with rose scent" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Seed Spring Reed Diffuser Set with Sticks,Vanilla Scent Oil for Bedroom Office Gym and Stress Relief, Home Fragrance Products 50 ml / 1.7 oz |OR| Yankee Candle Reed Diffuser, Sun-Drenched Apricot Rose", + "Rose" + ] + } + } + ] + }, + "intent_template_id": 24, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/seed-spring-reed-diffuser-set-with-sticks-vanilla-scent-oil-for-bedroom-office-gym-and-stress-relief-home-fragrance-products-50-ml-1-7-oz.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 289, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add {{quantity}} of {{item}} from this page to my shopping cart.", + "intent": "Add 72 cans of the purple energy drink from this page to my shopping cart.", + "instantiation_dict": { + "quantity": "72 cans", + "item": "the purple energy drink" + }, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart/", + "locator": "lambda:(() => {try {return Array.from(document.querySelectorAll('.item-info')).find(el => el.textContent.includes('V8 +Energy, Healthy Energy Drink, Steady Energy from Black and Green Tea, Pomegranate Blueberry, 8 Ounce Can ,Pack of 24')).querySelector('input').getAttribute('data-item-qty')} catch (e) {return ' '}})()", + "required_contents": { + "must_include": [ + "3" + ] + } + } + ] + }, + "intent_template_id": 96, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/v8-energy-healthy-energy-drink-steady-energy-from-black-and-green-tea-pomegranate-blueberry-8-ounce-can-pack-of-24.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 290, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/index/?p=2&q=pillow", + "geolocation": null, + "intent_template": "What is the {{size_attribute}} of the {{product}} with {{attribute}} in {{unit}}?", + "intent": "What is the size of the white queen size pillow with grid pattern in inches?", + "instantiation_dict": { + "size_attribute": "size", + "product": "white queen size pillow", + "attribute": "grid pattern", + "unit": "inches" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "26", + "20" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "26*20" + }, + "intent_template_id": 95, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/wellboo-white-pillowcases-black-and-white-plaid-pillow-cases-large-checkered-pillow-shams-grid-cotton-queen-standard-women-men-teen-boys-girls-buffalo-check-pillowcase-decorative-envelope-closure.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 291, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/index/?p=2&q=end+table", + "geolocation": null, + "intent_template": "What is the {{size_attribute}} of the {{product}} with {{attribute}} in {{unit}}?", + "intent": "What is the feet height of the end table with an alarm clock on it in inches?", + "instantiation_dict": { + "size_attribute": "feet height", + "product": "end table", + "attribute": "an alarm clock on it", + "unit": "inches" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "15.75" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "15.75\"" + }, + "intent_template_id": 95, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/lohorfire-silver-mirror-end-table-nightstand-bedside-end-table-with-storage-drawer-accent-table-for-living-room-bedroom-home-furniture-small-place.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 292, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/?q=shirt", + "geolocation": null, + "intent_template": "What is the {{attribute}} of {{item}} that has the most similar pattern as {{property}} in the picture in this page?", + "intent": "What is the price of the shirt that has the most similar pattern as the man is wearing in the picture in this page?", + "image": "https://images.pexels.com/photos/6963032/pexels-photo-6963032.jpeg", + "instantiation_dict": { + "attribute": "price", + "item": "the shirt", + "property": "the man is wearing" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "3.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$3.99" + }, + "intent_template_id": 97, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/mens-casual-plaid-collar-shirt-beach-shirts-for-men-long-t-shirt-men-s-t-shirts-muscle-shirts-mens-graphic-tees.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 293, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/?q=bag+black+white", + "geolocation": null, + "intent_template": "What is the {{attribute}} of {{item}} that has the most similar pattern as {{property}} in the picture in this page?", + "intent": "What is the weight in ounces of the bag that has the most similar pattern as the women is wearing in the picture in this page?", + "image": "https://images.pexels.com/photos/15543520/pexels-photo-15543520.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "attribute": "weight in ounces", + "item": "the bag", + "property": "the women is wearing" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "7.58" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "7.58 Ounces" + }, + "intent_template_id": 97, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/makeup-cosmetic-bag-black-white-houndstooth-classical-portable-travel-train-case-toiletry-bags-organizer-multifunction-storage.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 294, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/index/?p=5&q=shirt", + "geolocation": null, + "intent_template": "What is the {{attribute}} of {{item}} that has the most similar pattern as {{property}} in the picture in this page?", + "intent": "What is the price of the shirt that has the most similar pattern as the pillow in the picture in this page?", + "image": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B0969WGYKV.1.jpg", + "instantiation_dict": { + "attribute": "price", + "item": "the shirt", + "property": "the pillow" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$0.99" + }, + "intent_template_id": 97, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/mens-t-shirt-men-s-polo-shirts-short-sleeve-tee-basic-sports-t-shirts-floral-zipper-mens-summer-shirt-tops.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 295, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me {{item}} that {{attribute}} the picture.", + "intent": "Find me a rug that has the simliar pattern as the animal in the picture.", + "image": "https://images.pexels.com/photos/208965/pexels-photo-208965.jpeg", + "instantiation_dict": { + "item": "a rug", + "attribute": "has the simliar pattern as the animal in" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".fotorama__stage__frame", + "eval_vqa": [ + { + "question": "Is this a rug? (yes/no)", + "answer": "yes" + }, + { + "question": "Does this item have a leopard print pattern? (yes/no)", + "answer": "yes" + } + ] + } + ], + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "" + }, + "intent_template_id": 98, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/modern-area-rugs-brown-cheetah-leopard-5x8-rugs-for-living-room-5x7-clearance.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 296, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me {{item}} that {{attribute}} the picture.", + "intent": "Find me powder to make the beverage that is the same as the picture.", + "image": "https://images.pexels.com/photos/5947093/pexels-photo-5947093.jpeg", + "instantiation_dict": { + "item": "powder to make the beverage", + "attribute": "is the same as" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.page-title').textContent.toLocaleLowerCase()", + "required_contents": { + "must_include": [ + "matcha |OR| green tea" + ] + } + } + ], + "string_note": "", + "reference_answer_raw_annotation": "" + }, + "intent_template_id": 98, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/ryu-mei-japanese-organic-matcha-green-tea-powder-super-food-with-high-level-of-antioxidant-catechin-kyoto-g4-100-2-packs.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 297, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me {{item}} that {{attribute}} the picture.", + "intent": "Find me a hoodie that has the same brand as the one the man is wearing in the picture.", + "image": "https://images.pexels.com/photos/1989242/pexels-photo-1989242.jpeg", + "instantiation_dict": { + "item": "a hoodie", + "attribute": "has the same brand as the one the man is wearing in" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html", + "page_image_query" + ], + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".fotorama__stage__frame", + "eval_vqa": [ + { + "question": "Is this a hoodie? (yes/no)", + "answer": "yes" + } + ] + } + ], + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.page-title').textContent", + "required_contents": { + "must_include": [ + "Champion" + ] + } + } + ], + "string_note": "", + "reference_answer_raw_annotation": "" + }, + "intent_template_id": 98, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/ryu-mei-japanese-organic-matcha-green-tea-powder-super-food-with-high-level-of-antioxidant-catechin-kyoto-g4-100-2-packs.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 298, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/?q=champion+hoodie", + "geolocation": null, + "intent_template": "What size of {{item}} should I buy if {{condition}}?", + "intent": "What size of the dark green hoodie in this page should I buy if my chest size is 120 cm?", + "instantiation_dict": { + "item": "the dark green hoodie in this page", + "condition": "my chest size is 120 cm" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "XL |OR| X-Large" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "This task requires OCR.", + "reference_answer_raw_annotation": "XL" + }, + "intent_template_id": 99, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/champion-men-s-sideline-quarter-zip-pullover-with-woven.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 299, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/?q=under+armor", + "geolocation": null, + "intent_template": "What size of {{item}} should I buy if {{condition}}?", + "intent": "What size of the zip hoodie in this page should I buy if my waist size is 30 inches?", + "instantiation_dict": { + "item": "the zip hoodie in this page", + "condition": "my waist size is 30 inches" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "S |OR| Small" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "This task requires OCR.", + "reference_answer_raw_annotation": "S" + }, + "intent_template_id": 99, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/under-armor-men-s-sc30-ua-threadborne-full-zip-hoodie.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 300, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/baseball-hoodie-for-boys-teens-men-word-cloud-silhouette.html", + "geolocation": null, + "intent_template": "What size of {{item}} should I buy if {{condition}}?", + "intent": "What size of this item should I buy if my shirt width is 28 and 1/4 inches?", + "instantiation_dict": { + "item": "this item", + "condition": "my shirt width is 28 and 1/4 inches" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "XX-Large |OR| XXLarge |OR| 2X-L |OR| 2XL" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "This task requires OCR.", + "reference_answer_raw_annotation": "XX-Large" + }, + "intent_template_id": 99, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/baseball-hoodie-for-boys-teens-men-word-cloud-silhouette.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 301, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/jaybee-s-nuts-and-dried-fruits-sweet-dried-pineapple-rings-15-oz-tasty-tropical-dry-fruit-slices-on-the-go-snack-resealable-bag-kosher.html", + "geolocation": null, + "intent_template": "How many calories are in {{item}}?", + "intent": "How many calories are in this item per serving?", + "instantiation_dict": { + "item": "this item per serving" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "70" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "This task requires OCR.", + "reference_answer_raw_annotation": "70" + }, + "intent_template_id": 100, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/jaybee-s-nuts-and-dried-fruits-sweet-dried-pineapple-rings-15-oz-tasty-tropical-dry-fruit-slices-on-the-go-snack-resealable-bag-kosher.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 302, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/q-drinks-ginger-beer-25-4-ounce-12-bottles.html", + "geolocation": null, + "intent_template": "How many calories are in {{item}}?", + "intent": "How many calories are in this item per container?", + "instantiation_dict": { + "item": "this item per container" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "280" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "This task requires OCR.", + "reference_answer_raw_annotation": "280" + }, + "intent_template_id": 100, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/q-drinks-ginger-beer-25-4-ounce-12-bottles.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 303, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/tea-zone-2-lb-strawberry-powder.html", + "geolocation": null, + "intent_template": "How many calories are in {{item}}?", + "intent": "How many calories are in this item per container?", + "instantiation_dict": { + "item": "this item per container" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "4200" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "4200" + }, + "intent_template_id": 100, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/tea-zone-2-lb-strawberry-powder.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 304, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/top-dry-garlic-shrimp-fries-25g-best-taiwanese-gift-top-dry-fresh-stock-taiwan-food-snack.html", + "geolocation": null, + "intent_template": "How many calories are in {{item}}?", + "intent": "How many calories are in this item per serving?", + "instantiation_dict": { + "item": "this item per serving" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "117.6 |OR| 117 |OR| 118" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "This task requires OCR and translation.", + "reference_answer_raw_annotation": "117.6" + }, + "intent_template_id": 100, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__SHOPPING__/top-dry-garlic-shrimp-fries-25g-best-taiwanese-gift-top-dry-fresh-stock-taiwan-food-snack.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 305, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/index/?p=6&q=pineapple", + "geolocation": null, + "intent_template": "Find me {{item}} that {{attribute}} the picture.", + "intent": "Find me the item in this page that has simliar packaging boxes to the ones in the picture.", + "image": "https://images.pexels.com/photos/7620774/pexels-photo-7620774.jpeg", + "instantiation_dict": { + "item": "the item in this page", + "attribute": "has simliar packaging boxes to the ones in" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/sunny-hills-pineapple-cake-10pcs-500g-best-taiwanese-gift-sunny-hills-fresh-stock-taiwan-food-pineapple-cake.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 98, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/sunny-hills-pineapple-cake-10pcs-500g-best-taiwanese-gift-sunny-hills-fresh-stock-taiwan-food-pineapple-cake.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 306, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/index/?q=cookie", + "geolocation": null, + "intent_template": "Show me the product page of the {{item}}.", + "intent": "Show me the product page of the 10 oz net weight cookie.", + "instantiation_dict": { + "item": "10 oz net weight cookie" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/trader-joe-s-cookie-butter-sandwich-cookies.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 101, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 307, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/index/?p=6&q=ice+cream", + "geolocation": null, + "intent_template": "Show me the product page of the {{item}}.", + "intent": "Show me the product page of the 1.6 oz net weight ice cream.", + "instantiation_dict": { + "item": "1.6 oz net weight ice cream" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/joy-mini-cups-miniature-ice-cream-cones-for-kids-desserts-cupcake-cones-cake-pops-42-count-1-box-42-cones.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 101, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 308, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/index/?q=pretzel", + "geolocation": null, + "intent_template": "Show me the product page of the {{item}}.", + "intent": "Show me the product page of the pretzels which says 1.8 pounds net weight on its packaging.", + "instantiation_dict": { + "item": "pretzels which says 1.8 pounds net weight on its packaging" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/lalees-blueberry-yogurt-pretzels-approx-1-3-pounds-yogurt-covered-pretzels-gourmet-pretzels.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 101, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 309, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "viewport_size": { + "width": 430, + "height": 932 + }, + "intent_template": "Please show me the product page for the item in the {{row}} row, {{column}} column.", + "intent": "Please show me the product page for the item in the third row, first column.", + "instantiation_dict": { + "row": "third", + "column": "first" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/so-delicious-dairy-free-cocowhip-light-vegan-non-gmo-project-verified-9-oz-tub.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 102, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 310, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/health-household.html", + "geolocation": null, + "viewport_size": { + "width": 844, + "height": 393 + }, + "intent_template": "Please show me the product page for the item in the {{row}} row, {{column}} column.", + "intent": "Please show me the product page for the item in the second row, first column.", + "instantiation_dict": { + "row": "second", + "column": "first" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/lr1130-ag10-battery-1-5v-long-lasting-alkaline-button-cell-batteries-20-batteries-5-year-warranty.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 102, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 311, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "viewport_size": { + "width": 896, + "height": 414 + }, + "intent_template": "Tell me the price of the {{product}} in the {{location}}.", + "intent": "Tell me the price of the item in the fourth row, second column.", + "instantiation_dict": { + "product": "item", + "location": "fourth row, second column" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "string_note": "There is no fourth row.", + "reference_url": "", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 18, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 312, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "viewport_size": { + "width": 1280 + }, + "intent_template": "Please show me the product page for the item in the {{row}} row, {{column}} column.", + "intent": "Please show me the product page for the item in the second row, second column.", + "instantiation_dict": { + "row": "second", + "column": "second" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/q-mixers-premium-ginger-ale-real-ingredients-less-sweet-6-7-fl-oz-24-bottles.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 102, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 313, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen.html", + "geolocation": null, + "viewport_size": { + "width": 430, + "height": 932 + }, + "intent_template": "Please add the item in the {{row}} row, {{column}} column to the shopping cart.", + "intent": "Please add the item in the third row, second column to the shopping cart.", + "instantiation_dict": { + "row": "third", + "column": "second" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Wood Storage Bench Homes Collection w/2 Drawers, Wicker Storage Bench Entryway Bench with Removable Basket & Cushion, Fully Assembled Shoe Bench for Hallway Entryway Living Room (Espresso)" + ] + } + } + ] + }, + "intent_template_id": 103, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 314, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/?q=adapter", + "geolocation": null, + "intent_template": "Find me {{item}}.", + "intent": "Find me an adapter to connect these devices from the two images from this page.", + "image": [ + "https://images.pexels.com/photos/914915/pexels-photo-914915.jpeg", + "https://images.pexels.com/photos/2047905/pexels-photo-2047905.jpeg" + ], + "instantiation_dict": { + "item": "an adapter to connect these devices from the two images from this page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/adapters-for-macbook-pro-usb-hdmi-adapter-multiport-usb-c-hub-macbook-pro-adapter-dongle-7-in-2-macbook-pro-usb-c-docking-station-dual-monitor-hdmi-adapter-with-dual-hdmi-2-usb-3-0-tf-sd-usb-c-100w.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 104, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 315, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/?q=usb+c", + "geolocation": null, + "intent_template": "Find me {{item}}.", + "intent": "Find me an adapter to connect these devices from the two images from this page.", + "image": [ + "https://images.pexels.com/photos/1738642/pexels-photo-1738642.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "https://images.pexels.com/photos/434346/pexels-photo-434346.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1" + ], + "instantiation_dict": { + "item": "an adapter to connect these devices from the two images from this page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/usb-c-hub-hdmi-adapter-for-macbook-pro-multi-ports-mac-dongle-with-4k-usb-c-to-hdmi-7-in-1-usb-c-hub-usb-c-to-hdmi-sd-tf-card-reader-3-usb-3-0-and-usb-c-power-pass-through-port-adapters.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 104, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 316, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/?q=coke", + "geolocation": null, + "intent_template": "Tell me the price of the {{product}}.", + "intent": "Tell me the price of the 30 oz coke.", + "instantiation_dict": { + "product": "30 oz coke" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "string_note": "There is no 30oz coke.", + "reference_url": "", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 19, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 317, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/?q=beer", + "geolocation": null, + "intent_template": "Tell me the price of the {{product}}.", + "intent": "Tell me the price of the 18 oz beer.", + "instantiation_dict": { + "product": "18 oz beer" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "string_note": "There is no 18oz beer.", + "reference_url": "", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 19, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 318, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/?q=USB+C+HUB%2C+7+in+1", + "geolocation": null, + "intent_template": "Please add to my shopping cart all the items from this page that can connect these devices from the two images.", + "intent": "Please add to my shopping cart all the items from this page that can connect these devices from the two images.", + "image": [ + "https://images.pexels.com/photos/2148216/pexels-photo-2148216.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "https://images.pexels.com/photos/415043/pexels-photo-415043.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1" + ], + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "USB C Hub Ethernet, 7 in 1 Type C Hub Multiport Adapter Desk Mount USB Docking Station, with 3 USB3.0, USB2.0, 1Gbps Ethern and SD/TF Card Reader, Clamp Design for iMac, Desktop, Computer, Table Edge", + "USB Docking Station Giq USB C hub USB 3.0 to Dual HDMI VGA Adapter Triple Display USB C Laptop Docking Station Dual Display Compatible with MacBook M1 USB Dock" + ] + } + } + ] + }, + "intent_template_id": 105, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping", + "wikipedia" + ], + "task_id": 319, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/?q=type+c+to+3.5", + "geolocation": null, + "intent_template": "Please add to my shopping cart all the items from this page that can connect these devices from the two images.", + "intent": "Please add to my shopping cart all the items from this page that can connect these devices from the two images.", + "image": [ + "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Galaxy_Z_Flip.jpg.webp", + "__SHOPPING__/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B06XWXR6M9.0.jpg" + ], + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "USB Type C to 3.5mm Headphone and Charger Adapter, 2-in-1 USB C to 3.5 mm Audio Jack PD Fast Charging Aux Cable Compatible with Samsung Galaxy S21 S20+ S20 Fe S10 Note 20 Ultra 10 Plus, Pixel 3 4 XL" + ] + } + } + ] + }, + "intent_template_id": 105, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 320, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/breads-bakery/cookies.html", + "geolocation": null, + "image": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B07CM3LVT2.0.jpg", + "intent_template": "Add {{quantity}} of {{item}} from this page to my shopping cart.", + "intent": "Add enough to have 48 bars of the item that most looks like the image from this page to my shopping cart.", + "instantiation_dict": { + "quantity": "enough to have 48 bars", + "item": "the item that most looks like the image" + }, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart/", + "locator": "lambda:(() => {try {return Array.from(document.querySelectorAll('.item-info')).find(el => el.textContent.includes('Protein Wafer Peanut Butter - Box of 12 Bars, Peanut Butter, 12 Count')).querySelector('input').getAttribute('data-item-qty')} catch (e) {return ' '}})()", + "required_contents": { + "must_include": [ + "4" + ] + } + } + ] + }, + "intent_template_id": 96, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/protein-wafer-peanut-butter-box-of-12-bars-peanut-butter-12-count.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 321, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/breads-bakery/cookies.html", + "geolocation": null, + "image": "https://images.pexels.com/photos/6734161/pexels-photo-6734161.jpeg", + "intent_template": "Add {{quantity}} of {{item}} from this page to my shopping cart.", + "intent": "Add enough to have 400 of the cookies that most looks like the image from this page to my shopping cart.", + "instantiation_dict": { + "quantity": "enough to have 400", + "item": "the cookies that most looks like the image" + }, + "require_reset": true, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart/", + "locator": "lambda:(() => {try {return Array.from(document.querySelectorAll(\".item-info\")).find(el => el.textContent.includes(\"Annie's Organic 20 Piece Grabbits Chocolate Mint Sandwich Cookies, 8.06 Ounce (Pack of 10)\")).querySelector(\"input\").getAttribute(\"data-item-qty\")} catch (e) {return \" \"}})()", + "required_contents": { + "must_include": [ + "2" + ] + } + } + ] + }, + "intent_template_id": 96, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/annie-s-organic-20-piece-grabbits-chocolate-mint-sandwich-cookies-8-06-ounce-pack-of-10.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 322, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/?q=vacuum", + "geolocation": null, + "image": "https://images.pexels.com/photos/6617538/pexels-photo-6617538.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "intent_template": "I want to buy this specific item, but only if it's brand new. Can you add one from this page to my cart?", + "intent": "I want to buy this specific item, but only if it's brand new. Can you add one from this page to my cart?", + "instantiation_dict": {}, + "require_reset": true, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The vaccum cleaner is not brand new", + "reference_answer_raw_annotation": "The vaccum cleaner is not brand new" + }, + "intent_template_id": 106, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 323, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/index/?p=2&q=switch", + "geolocation": null, + "intent_template": "Tell me the price of the {{product}}.", + "intent": "Tell me the price of the purple drink on this page.", + "instantiation_dict": { + "product": "purple drink on this page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "string_note": "There is no purple drink on the page.", + "reference_url": "", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 19, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 324, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/index/?q=screen+protector", + "geolocation": null, + "image": "https://images.pexels.com/photos/12741170/pexels-photo-12741170.jpeg", + "intent_template": "Tell me the price of the {{product}}.", + "intent": "Tell me the price of the items on this page that are compatible with this phone brand.", + "instantiation_dict": { + "product": "items on this page that are compatible with this phone brand" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "9.99", + "8.49" + ] + }, + "reference_url": "", + "program_html": [] + }, + "intent_template_id": 19, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "__SHOPPING__/fotbor-for-iphone-xr-iphone-11-screen-protector-privacy-tempered-glass-film-2-pack-iphone-11-privacy-screen-protector-iphone-xr-privacy-screen-protector-anti-spy-easy-install-case-friendly-6-1-inch.html and __SHOPPING__/mkeke-compatible-with-iphone-11-pro-max-screen-protector-iphone-xs-max-screen-protector-tempered-glass-3-pack-6-5.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 325, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/5082559/pexels-photo-5082559.jpeg", + "intent_template": "Purchase the cheapest {{item}}, in {{color}}.", + "intent": "Purchase the cheapest item that closely resembles the item on the right in the image, in blue.", + "instantiation_dict": { + "item": "item that closely resembles the item on the right in the image", + "color": "blue" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B09443ZWQD", + "Blue" + ] + } + } + ] + }, + "intent_template_id": 107, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/wireless-mouse-3-adjustable-dpi-innovative-stylish-portable-gaming-mouse-suitable-for-notebook-pc-computer-office-home-work.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 326, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/18966448/pexels-photo-18966448.jpeg", + "intent_template": "Purchase the cheapest {{item}}, in {{color}}.", + "intent": "Purchase the cheapest headphones that has the same brand as the one in the image, in blue.", + "instantiation_dict": { + "item": "headphones that has the same brand as the one in the image", + "color": "blue" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B08KKW2ZS2", + "Mint" + ] + } + } + ] + }, + "intent_template_id": 107, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/logitech-g335-wired-gaming-headset-with-flip-to-mute-microphone-3-5mm-audio-jack-memory-foam-earpads-lightweight-compatible-with-pc-playstation-xbox-nintendo-switch-mint.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 327, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/8534088/pexels-photo-8534088.jpeg", + "intent_template": "Purchase the cheapest {{item}}, in {{color}}.", + "intent": "Purchase the cheapest item that has the same functionality as this, in black.", + "instantiation_dict": { + "item": "item that has the same functionality as this", + "color": "black" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B09RB14X4D", + "Black" + ] + } + } + ] + }, + "intent_template_id": 107, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/wireless-earbuds-mics-call-noise-canceling-wireless-charging-case-bluetooth-5-0-headset-wireless-earphones-mini-earbuds-stereo-headphones-white.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 328, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B07X9SC7PW.0.jpg", + "intent_template": "Buy {{item}} while minimizing costs.", + "intent": "Buy eight batteries of the same type as shown in the image while minimizing costs.", + "instantiation_dict": { + "item": "eight batteries of the same type as shown in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B001B826IY", + "2.40" + ] + } + } + ] + }, + "intent_template_id": 108, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/gp-batteries-30310-battery-alkaline-aaa.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 329, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/1433191/pexels-photo-1433191.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "intent_template": "Buy {{item}} while minimizing costs.", + "intent": "Buy 40 sheet of films as shown in the image while minimizing costs.", + "instantiation_dict": { + "item": "40 sheet of films as shown in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B073W8MFP9", + "25.98" + ] + } + } + ] + }, + "intent_template_id": 108, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/fujifilm-instax-mini-photo-camera-film-pack.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 330, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/2634435/pexels-photo-2634435.jpeg", + "intent_template": "Buy {{item}} {{condition}}.", + "intent": "Buy the cheapest camera of the exact same model as the image, in the same color.", + "instantiation_dict": { + "item": "the cheapest camera of the exact same model as the image,", + "condition": "in the same color" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07429BS32", + "Ice Blue" + ] + } + } + ] + }, + "intent_template_id": 109, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/fujifilm-instax-mini-9-instant-camera-cobalt-blue-with-2-x-instant-twin-film-pack-40-exposures.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 331, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/3292366/pexels-photo-3292366.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "intent_template": "Purchase the exact item shown in the image.", + "intent": "Purchase the exact item shown in the image.", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "Sony", + "Alpha A7 II" + ] + } + } + ] + }, + "intent_template_id": 110, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/sony-alpha-a7-ii-digital-camera-body-with-sonnar-t-fe-55mm-f-1-8-za-lens-64gb-card-battery-charger-case-tripod-kit.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 332, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/7789217/pexels-photo-7789217.jpeg", + "intent_template": "Purchase the exact item shown in the image.", + "intent": "Purchase the exact item shown in the image.", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "Canon", + "EOS R6" + ] + } + } + ] + }, + "intent_template_id": 110, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "__SHOPPING__/canon-eos-r6-mirrorless-digital-camera-body-bundle-128gb-memory-case-tripod-18pc-bundle.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 333, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/2378138/pexels-photo-2378138.jpeg", + "intent_template": "Purchase the exact item shown in the image.", + "intent": "Purchase the exact item shown in the image.", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "string_note": "This item does not exist on OneStopMarket.", + "reference_url": "", + "program_html": [] + }, + "intent_template_id": 110, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 334, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/18943585/pexels-photo-18943585.jpeg", + "intent_template": "Purchase the exact item shown in the image.", + "intent": "Purchase the exact item shown in the image.", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "string_note": "This item does not exist on OneStopMarket.", + "reference_url": "", + "program_html": [] + }, + "intent_template_id": 110, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 335, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/6347514/pexels-photo-6347514.jpeg", + "intent_template": "Buy me {{item}} to {{address}}.", + "intent": "Buy me a chocolate ice cream to the address same as the label shown in the image. My phone number is also (650) 555-0111 in case it is needed.", + "instantiation_dict": { + "item": "a chocolate ice cream", + "address": "the address same as the label shown in the image. My phone number is also (650) 555-0111 in case it is needed" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "chocolate", + "ice cream" + ] + } + }, + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".box-order-shipping-address\").outerText", + "required_contents": { + "must_include": [ + "123 Bloome Street", + "Manhattan, New York, 12112" + ] + } + } + ] + }, + "intent_template_id": 111, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "This task requires OCR." + }, + { + "sites": [ + "shopping" + ], + "task_id": 336, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/6347909/pexels-photo-6347909.jpeg", + "intent_template": "Buy me {{item}} to {{address}}.", + "intent": "Buy me the cheapest color photo printer (from the \"Printers & Accessories\") category to Emily's place as shown in the image. My phone number is also (650) 555-0111 in case it is needed..", + "instantiation_dict": { + "item": "the cheapest color photo printer (from the \"Printers & Accessories\") category", + "address": "Emily's place as shown in the image. My phone number is also (650) 555-0111 in case it is needed." + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B005TI2Q6O" + ] + } + }, + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".box-order-shipping-address\").outerText", + "required_contents": { + "must_include": [ + "55 W89", + "Littletown", + "Manhattan, New York, 12345" + ] + } + } + ] + }, + "intent_template_id": 111, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "This task requires OCR." + }, + { + "sites": [ + "shopping" + ], + "task_id": 337, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/6348050/pexels-photo-6348050.jpeg", + "intent_template": "Buy me {{item}} to {{address}}.", + "intent": "Buy me the cheapest bike to Mars's place as shown in the image. My phone number is also (650) 555-0111 in case it is needed..", + "instantiation_dict": { + "item": "the cheapest bike", + "address": "Mars's place as shown in the image. My phone number is also (650) 555-0111 in case it is needed." + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "string_note": "OneStopMarket has no bikes.", + "reference_url": "", + "program_html": [] + }, + "intent_template_id": 111, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "OneStopMarket has no bikes." + }, + { + "sites": [ + "shopping" + ], + "task_id": 338, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/129112/pexels-photo-129112.jpeg", + "intent_template": "Change my default shipping address to the place shown in the image.", + "intent": "Change my default shipping address to the place shown in the image.", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/customer/address/", + "locator": "document.querySelector(\".box-address-shipping\").outerText", + "required_contents": { + "must_include": [ + "1600 Pennsylvania Ave", + "Washington, District of Columbia, 20500", + "United States" + ] + } + } + ] + }, + "intent_template_id": 112, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "It's an image of the White House." + }, + { + "sites": [ + "shopping" + ], + "task_id": 339, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/19047616/pexels-photo-19047616.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "intent_template": "Change my default shipping address to the place shown in the image.", + "intent": "Change my default shipping address to the place shown in the image.", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/customer/address/", + "locator": "document.querySelector(\".box-address-shipping\").outerText", + "required_contents": { + "must_include": [ + "1 Kinkakuji", + "Kyoto", + "Japan" + ] + } + } + ] + }, + "intent_template_id": 112, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "It's an image of the Kinkaku-ji. Actual address is sometimes in Japanese, so address eval only checks for the first part." + }, + { + "sites": [ + "shopping" + ], + "task_id": 340, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/2071882/pexels-photo-2071882.jpeg", + "intent_template": "Change my default shipping address to the place shown in the image.", + "intent": "Change my default shipping address to the place shown in the image.", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "string_note": "This image does not have a shipping address.", + "reference_url": "", + "program_html": [] + }, + "intent_template_id": 112, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 341, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/11559638/pexels-photo-11559638.jpeg", + "intent_template": "Buy {{item}} {{condition}}.", + "intent": "Buy a CPU which is compatible with the motherboard in the image.", + "instantiation_dict": { + "item": "a CPU", + "condition": "which is compatible with the motherboard in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B08X6NXNX7", + "323.99" + ] + } + }, + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".item-options\").outerText", + "required_contents": { + "must_include": [ + "Processor 8 Cores" + ] + } + } + ] + }, + "intent_template_id": 109, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__SHOPPING__/intelr-coretm-i7-11700kf-desktop-processor-8-cores-up-to-5-0-ghz-unlocked-lga1200-intelr-500-series-select-400-series-chipset-125w.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 342, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/18338406/pexels-photo-18338406.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "intent_template": "Buy {{item}} {{condition}}.", + "intent": "Buy a motherboard which has the same CPU socket as the motherboard in the image.", + "instantiation_dict": { + "item": "a motherboard", + "condition": "which has the same CPU socket as the motherboard in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B09J1S2LDS", + "294.99" + ] + } + } + ] + }, + "intent_template_id": 109, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__SHOPPING__/asus-tuf-gaming-z690-plus-wifi-d4-lga1700-intel-12th-gen-atx-gaming-motherboard-pcie-5-0-ddr4-4xm-2-nvme-ssd-14-2-power-stages-wifi-6-2-5gb-lan-front-usb-3-2-gen-2-type-c-thunderbolt-4-argb-headers.html" + }, + { + "sites": [ + "shopping" + ], + "task_id": 343, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/3665444/pexels-photo-3665444.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "intent_template": "Buy {{item}} {{condition}}.", + "intent": "Buy a desktop computer which has a CPU of the same brand as the CPU in the image.", + "instantiation_dict": { + "item": "a desktop computer", + "condition": "which has a CPU of the same brand as the CPU in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "AMD", + "Desktop |OR| PC" + ] + } + } + ] + }, + "intent_template_id": 109, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 344, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/4705617/pexels-photo-4705617.jpeg", + "intent_template": "Buy {{item}} {{condition}}.", + "intent": "Buy the cheapest battery which is the same type as the boy is holding in the image.", + "instantiation_dict": { + "item": "the cheapest battery", + "condition": "which is the same type as the boy is holding in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B002M6WY72", + "8.59" + ] + } + } + ] + }, + "intent_template_id": 109, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "__SHOPPING__/petsafe-9-volt-replacement-alkaline-battery-compatible-with-petsafe-9v-battery-operated-products-pac11-12067.html" + }, + { + "sites": [ + "shopping", + "wikipedia" + ], + "task_id": 345, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__ |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Country_calling_codes_map.svg.png.webp", + "geolocation": null, + "image": null, + "intent_template": "Prepend the country code of {{country}} to the phone number of my account profile.", + "intent": "Prepend the country code of South Korea to the phone number of my account profile.", + "instantiation_dict": { + "country": "South Korea" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/customer/address/", + "locator": "document.querySelector(\".box-address-billing > div.box-content > address > a\").outerText.substring(0, 3)", + "required_contents": { + "must_include": [ + "82 |OR| +82" + ] + } + } + ] + }, + "intent_template_id": 113, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 346, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/777059/pexels-photo-777059.jpeg", + "intent_template": "Prepend the country code of {{country}} to the phone number of my account profile.", + "intent": "Prepend the country code of the country shown in the image to the phone number of my account profile.", + "instantiation_dict": { + "country": "the country shown in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/customer/address/", + "locator": "document.querySelector(\".box-address-billing > div.box-content > address > a\").outerText.substring(0, 3)", + "required_contents": { + "must_include": [ + "65 |OR| +65" + ] + } + } + ] + }, + "intent_template_id": 113, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "Singapore +65" + }, + { + "sites": [ + "shopping" + ], + "task_id": 347, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "image": "https://images.pexels.com/photos/5967048/pexels-photo-5967048.jpeg", + "intent_template": "Prepend the country code of {{country}} to the phone number of my account profile.", + "intent": "Prepend the country code of the country of the map show in the image to the phone number of my account profile.", + "instantiation_dict": { + "country": "the country of the map show in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/customer/address/", + "locator": "document.querySelector(\".box-address-billing > div.box-content > address > a\").outerText.substring(0, 2)", + "required_contents": { + "must_include": [ + "7 |OR| +7" + ] + } + } + ] + }, + "intent_template_id": 113, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "Russia +7" + }, + { + "sites": [ + "shopping" + ], + "task_id": 348, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/index/?q=travel+adapter", + "geolocation": null, + "image": "https://images.pexels.com/photos/16075421/pexels-photo-16075421.jpeg", + "intent_template": "Empty my cart first and add all the items (in any color) from this page to my shopping cart which {{condition}}.", + "intent": "Empty my cart first and add all the items (in any color) from this page to my shopping cart which is compatible with the socket in the image.", + "instantiation_dict": { + "condition": "is compatible with the socket in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Key Power 230-Watt Step Down 220V to 110V Voltage Converter & International Travel Adapter", + "USB Charger, Charging Block CIQILY 5-Pack 1A/5V USB Power Home Travel Adapter Wall Charger Cube Brick Box Base Head Compatible for Phone X 8 7 6 Plus 5S, iPad, Samsung, LG, Moto,Tablet, Android Phone", + "OEM HTC USB Travel Charger Adapter U250 / CNR6300 / 79H00095-14M", + "Universal Power Supply Notebook Laptop Charger 120 W car Home Laptop Adapter Plug for car/Home/Travel with overheating/Overload/Short Circuit Protection US Plug", + "SCOOFEX Universal Travel Adapter, International Power Electric Outlet Converters with USB Ports - Wall Charger AC Plug Type C Type A Type G Type I for US/EU/UK/AU/Asia/China/Japan 150+Countries", + "130.73 |OR| 118.74" + ] + } + } + ] + }, + "intent_template_id": 114, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "MacBook Charger Case Cover is ambiguous: it's actually a case, which is why we consider both 130.73 or 118.74 to be correct." + }, + { + "sites": [ + "shopping" + ], + "task_id": 349, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/index/?q=travel+adapter", + "geolocation": null, + "image": "https://images.pexels.com/photos/6987724/pexels-photo-6987724.jpeg", + "intent_template": "Empty my cart first and add all the items (in any color) from this page to my shopping cart which {{condition}}.", + "intent": "Empty my cart first and add all the items (in any color) from this page to my shopping cart which is compatible with the socket in the image.", + "instantiation_dict": { + "condition": "is compatible with the socket in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Key Power 230-Watt Step Down 220V to 110V Voltage Converter & International Travel Adapter/Power Converter with Type C Port 18W - [Use for USA Appliance Overseas in Europe, AU, UK, Ireland, etc.]", + "Europe Travel Adapter, Ceptics Ultra Compact Dual USB Power Plug - for European Type C - 3 Inputs - iPhone, Laptop, Galaxy, Cell Phones, Camera Chargers, iWatch & More - CTU-9C", + "SCOOFEX Universal Travel Adapter, International Power Electric Outlet Converters with USB Ports - Wall Charger AC Plug Type C Type A Type G Type I for US/EU/UK/AU/Asia/China/Japan 150+Countries", + "74.15" + ] + } + } + ] + }, + "intent_template_id": 114, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 350, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/?q=+screwdriver#", + "geolocation": null, + "image": "https://images.pexels.com/photos/6208704/pexels-photo-6208704.jpeg", + "intent_template": "Buy {{item}} {{condition}}.", + "intent": "Buy a screwdriver which is compatible with the screw in the image from this page.", + "instantiation_dict": { + "item": "a screwdriver", + "condition": "which is compatible with the screw in the image from this page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B0968D6PC1" + ] + } + } + ] + }, + "intent_template_id": 109, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 351, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/?q=+screwdriver#", + "geolocation": null, + "image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/LR44_Button_Cell_Battery_IEC_Standard_Version.jpg.webp", + "intent_template": "Buy {{item}} {{condition}}.", + "intent": "Buy a battery with the same size and voltage as the image.", + "instantiation_dict": { + "item": "a battery", + "condition": "with the same size and voltage as the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "LR44" + ] + } + } + ] + }, + "intent_template_id": 109, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 352, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/?q=iphone+to+3.5", + "geolocation": null, + "intent_template": "Please add to my shopping cart all the items from this page that can connect these devices from the two images.", + "intent": "Please add to my shopping cart all the items from this page that can connect these devices from the two images.", + "image": [ + "https://images.pexels.com/photos/14666017/pexels-photo-14666017.jpeg", + "__SHOPPING__/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B06XWXR6M9.0.jpg" + ], + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "[Apple MFi Certified] Headphone Adapter for iPhone, Belcompany 2 Pack Lightning to 3.5mm AUX Earphone Audio Stereo Converter Compatible with iPhone 13/12/11/XS/XR/X/iPad, Support Call & Music Control" + ] + } + } + ] + }, + "intent_template_id": 105, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 353, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry.html?p=12", + "geolocation": null, + "intent_template": "I really like these {{product}}, but I am not satisfied with the color. Could you find me an item on this page for these {{product}} with a {{shadeChoice}} shade or color?", + "intent": "I really like these shoes, but I am not satisfied with the color. Could you find me an item on this page for these shoes with a darker shade or color?", + "image": "https://images.pexels.com/photos/3636684/pexels-photo-3636684.jpeg", + "instantiation_dict": { + "product": "shoes", + "shadeChoice": "darker" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/vans-old-skool-black-white-skate-vn-0d3hy28-mens-us-9-5-uk-8-5-eur-42-5.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 115, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 354, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Find me {{category}} that are of the same brand as the product in this picture. It needs to {{description}}.", + "intent": "Find me shoes that are of the same brand as the product in this picture. It needs to be less than 70 dollars.", + "image": "https://images.pexels.com/photos/14544746/pexels-photo-14544746.jpeg", + "instantiation_dict": { + "category": "shoes", + "description": "be less than 70 dollars" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "lambda:(() => { res = parseFloat(document.querySelector(\"#maincontent > div.columns > div > div.product-info-main > div.product-info-price > div.price-box.price-final_price > span > span\").outerText.substr(1)); return res ? res : 0; })()", + "required_contents": { + "required_values": [ + "< 70" + ] + } + }, + { + "url": "last", + "locator": "lambda:(() => { try { return document.querySelector('#maincontent > div.page-title-wrapper.product > h1 > span').textContent } catch (e) { return '' }})()", + "required_contents": { + "must_include": [ + "Croc" + ] + } + } + ] + }, + "intent_template_id": 116, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 355, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sports-outdoors/exercise-fitness.html?p=6", + "geolocation": null, + "intent_template": "I am a fan of {{organization}}. Can you find an image address of a {{product}} with their logo that I can wear to support them?", + "intent": "I am a fan of the Minnesota Vikings. Can you find an image address of a shirt with their logo that I can wear to support them?", + "instantiation_dict": { + "organization": "the Minnesota Vikings", + "product": "shirt" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B07BCKK3HS.0.jpg |OR| B07BCKK3HS.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B07BCKK3HS.0.jpg" + }, + "intent_template_id": 117, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 356, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry/men/clothing.html?p=16", + "geolocation": null, + "intent_template": "Find me {{category}} that is of the same brand as the product in this image. It needs to {{description}}.", + "intent": "Find me a hoodie that is of the same brand as the product in this image. It needs to have the brand logo in multicolor.", + "image": "https://images.pexels.com/photos/3819969/pexels-photo-3819969.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "category": "a hoodie", + "description": "have the brand logo in multicolor" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/coca-cola-horizon-stripe-logo-men-s-hooded-sweatshirt-white.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 118, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 357, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/index/?p=10&q=socks", + "geolocation": null, + "intent_template": "I want {{product}} that are {{colors}}. Can you find an image address of {{target}}?", + "intent": "I want socks that are green. Can you find an image address of green socks from this page?", + "instantiation_dict": { + "product": "socks", + "colors": "green", + "target": "green socks from this page" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B07BD2CV35.0.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B07BD2CV35.0.jpg" + }, + "intent_template_id": 119, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 358, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/index/?p=6&q=korean", + "geolocation": null, + "intent_template": "Buy {{product}} with {{detail}} on the packaging.", + "intent": "Buy snacks with a cute animal on the packaging.", + "instantiation_dict": { + "product": "snacks", + "detail": "a cute animal" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B094C9G7G2" + ] + } + } + ] + }, + "intent_template_id": 120, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 359, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/food-beverage-gifts/snack-gifts.html?p=14", + "geolocation": null, + "intent_template": "Buy the {{product}} with {{detail}} on the packaging (in any size).", + "intent": "Buy the snacks with horses on the packaging (in any size).", + "instantiation_dict": { + "product": "snacks", + "detail": "horses" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B0095MWPB6" + ] + } + } + ] + }, + "intent_template_id": 121, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 360, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/food-beverage-gifts/snack-gifts.html?p=23", + "geolocation": null, + "intent_template": "Buy the {{product}} with {{detail}} on the packaging (in any size).", + "intent": "Buy the snacks with a tiger on the packaging (in any size).", + "instantiation_dict": { + "product": "snacks", + "detail": "a tiger" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B08LBRN17G" + ] + } + } + ] + }, + "intent_template_id": 121, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 361, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/snacks-sweets.html?p=8", + "geolocation": null, + "intent_template": "Buy the {{product}} with {{detail}} on the packaging (in any size).", + "intent": "Buy the snacks with an owl on the packaging (in any size).", + "instantiation_dict": { + "product": "snacks", + "detail": "an owl" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B086RKGJPR" + ] + } + } + ] + }, + "intent_template_id": 121, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 362, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry/sport-specific-clothing.html?p=5", + "geolocation": null, + "intent_template": "Buy the {{product}} with {{detail}}.", + "intent": "Buy the shorts with a graphic print of a clock.", + "instantiation_dict": { + "product": "shorts", + "detail": "a graphic print of a clock" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B093BSCCZ7" + ] + } + } + ] + }, + "intent_template_id": 122, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 363, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/clothing-shoes-jewelry/men/clothing.html?p=13", + "geolocation": null, + "intent_template": "Given the image, buy the {{product}} which contain the same object in the image on its design.", + "intent": "Given the image, buy the shorts which contain the same object in the image on its design.", + "instantiation_dict": { + "product": "shorts" + }, + "image": "https://images.pexels.com/photos/1108099/pexels-photo-1108099.jpeg?auto=compress&cs=tinysrgb&w=800", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B09S3QYWJQ" + ] + } + } + ] + }, + "intent_template_id": 123, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 364, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sports-outdoors/exercise-fitness.html?p=6", + "geolocation": null, + "intent_template": "I am a fan of {{organization}}. Can you find an image address of a {{product}} with their logo that I can wear to support them?", + "intent": "I am a fan of the Green Bay Packers. Can you find an image address of a shirt with their logo that I can wear to support them?", + "instantiation_dict": { + "organization": "the Green Bay Packers", + "product": "shirt" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There are no Packers shirts available on this page.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 117, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 365, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/home-kitchen.html", + "geolocation": null, + "intent_template": "Buy the {{product}} with {{detail}}.", + "intent": "Buy the TV with the animal from the image on its display.", + "instantiation_dict": { + "product": "TV", + "detail": "the animal from the image on its display" + }, + "image": "https://images.pexels.com/photos/17581924/pexels-photo-17581924/free-photo-of-orca-in-zoo.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B089GT7Z5Z" + ] + } + } + ] + }, + "intent_template_id": 122, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 366, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/video-games.html?p=4", + "geolocation": null, + "intent_template": "Buy the {{product}} with {{detail}}.", + "intent": "Buy the controller with the cherry blossom design.", + "instantiation_dict": { + "product": "controller", + "detail": "the cherry blossom design" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B08N1FRB5X" + ] + } + } + ] + }, + "intent_template_id": 122, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 367, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/patio-lawn-garden.html", + "geolocation": null, + "intent_template": "Buy the {{product}} with {{detail}} on the packaging (in any size).", + "intent": "Buy the item with the tomato on the packaging (in any size).", + "instantiation_dict": { + "product": "item", + "detail": "the tomato" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B001B2SNC8" + ] + } + } + ] + }, + "intent_template_id": 121, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 368, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/cell-phones-accessories/cell-phones.html", + "geolocation": null, + "intent_template": "Buy the {{product}} with {{detail}} on the display image (in any color).", + "intent": "Buy the phone with the skateboarder on the display image (in any color).", + "instantiation_dict": { + "product": "phone", + "detail": "the skateboarder" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B096RX86Y2" + ] + } + } + ] + }, + "intent_template_id": 124, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 369, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy me a {{product}} that {{relation}} in the image{{detail}}.", + "intent": "Buy me a phone (from the \"Cell Phones\" category) that looks the most like the items in the image.", + "image": "https://images.pexels.com/photos/159644/art-supplies-brushes-rulers-scissors-159644.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "phone (from the \"Cell Phones\" category)", + "relation": "looks the most like the items", + "detail": "" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07NSB9ZTZ" + ] + } + } + ] + }, + "intent_template_id": 125, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 370, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy me a {{product}} that {{relation}} in the image{{detail}}.", + "intent": "Buy me a phone (from the \"Cell Phones\" category) that looks the most like the items in the image.", + "image": "https://images.pexels.com/photos/3954444/pexels-photo-3954444.png?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "phone (from the \"Cell Phones\" category)", + "relation": "looks the most like the items", + "detail": "" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07NVKVYZR" + ] + } + } + ] + }, + "intent_template_id": 125, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 371, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Search for \"{{term}}\" and buy me a {{product}} with {{detail}}.", + "intent": "Search for \"hoodie\" and buy me a hoodie (in any color) with a jack-o-lantern design for less than 10 dollars.", + "instantiation_dict": { + "term": "hoodie", + "product": "hoodie (in any color)", + "detail": "a jack-o-lantern design for less than 10 dollars" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B09G2YFQHF" + ] + } + } + ] + }, + "intent_template_id": 126, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 372, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy me a {{product}} that {{relation}} in the image{{detail}}.", + "intent": "Buy me a charging cable (from the \"Chargers & Power Adapters\" category) in any color that looks the most like the items in the image.", + "image": "https://images.pexels.com/photos/10593653/pexels-photo-10593653.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "charging cable (from the \"Chargers & Power Adapters\" category) in any color", + "relation": "looks the most like the items", + "detail": "" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B081WY776W" + ] + } + } + ] + }, + "intent_template_id": 125, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 373, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "What is the color of the {{type}} item in the \"{{product_category}}\" category?", + "instantiation_dict": { + "type": "cheapest car charger", + "product_category": "Chargers & Power Adapters" + }, + "intent": "What is the color of the cheapest car charger item in the \"Chargers & Power Adapters\" category?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "black" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "black" + }, + "intent_template_id": 6, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 374, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy me a {{product}} with {{detail}}.", + "intent": "Buy me a phone case (from the \"Basic Cases\" category) with a Barbie theme between 12-14 dollars.", + "instantiation_dict": { + "product": "phone case (from the \"Basic Cases\" category)", + "detail": "a Barbie theme between 12-14 dollars" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B09NJKPD3N" + ] + } + } + ] + }, + "intent_template_id": 127, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 375, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy me a {{product}} with {{detail}}.", + "intent": "Buy me a pink phone case for my iPhone 11 6.1\" with a Stitch graphic from the \"Basic Cases\" category.", + "instantiation_dict": { + "product": "pink phone case for my iPhone 11 6.1\"", + "detail": "a Stitch graphic from the \"Basic Cases\" category" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07Q17MK1F", + "iPhone 11 6.1" + ] + } + } + ] + }, + "intent_template_id": 127, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 376, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy me a {{product}} with {{detail}}.", + "intent": "Buy me a black phone case (from the \"Basic Cases\" category) with a Pikachu graphic.", + "instantiation_dict": { + "product": "black phone case (from the \"Basic Cases\" category)", + "detail": "a Pikachu graphic" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B09Q2B4X8S" + ] + } + } + ] + }, + "intent_template_id": 127, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 377, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy me a {{product}} with {{detail}}.", + "intent": "Buy me a red phone case with an Iron Man graphic.", + "instantiation_dict": { + "product": "red phone case", + "detail": "an Iron Man graphic" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07P9R993T", + "Red" + ] + } + } + ] + }, + "intent_template_id": 127, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 378, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I am a fan of {{organization}}. Can you find a url of an image of an item in the \"{{product_category}}\" category I can buy to support them?", + "intent": "I am a fan of the Atlanta Falcons. Can you find a url of an image of an item in the \"Playstation 4\" category I can buy to support them?", + "instantiation_dict": { + "organization": "the Atlanta Falcons", + "product_category": "Playstation 4" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There are no Falcons items available in this category.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 128, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 379, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I am a fan of {{organization}}. Can you find a url of an image of an item in the \"{{product_category}}\" category I can buy to support them?", + "intent": "I am a fan of the Cleveland Browns. Can you find a url of an image of an item in the \"Playstation 4\" category I can buy to support them?", + "instantiation_dict": { + "organization": "the Cleveland Browns", + "product_category": "Playstation 4" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There are no Browns items available in this category.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 128, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 380, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I am a fan of {{organization}}. Can you find a url of an image of an item in the \"{{product_category}}\" category I can buy to support them?", + "intent": "I am a fan of the Buffalo Bills. Can you find a url of an image of an item in the \"Playstation 4\" category I can buy to support them?", + "instantiation_dict": { + "organization": "the Buffalo Bills", + "product_category": "Playstation 4" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There are no Bills items available in this category.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 128, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 381, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I am a fan of {{organization}}. Can you find a url of an image of an item in the \"{{product_category}}\" category I can buy to support them?", + "intent": "I am a fan of the Miami Dolphins. Can you find a url of an image of an item in the \"Playstation 4\" category I can buy to support them?", + "instantiation_dict": { + "organization": "the Miami Dolphins", + "product_category": "Playstation 4" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "B07583CVJW.0.jpg |OR| B07583CVJW.1.jpg" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__SHOPPING__/media/catalog/product/cache/89ff578b9cd87e0600daac45c9e1ea98/B/0/B07583CVJW.0.jpg" + }, + "intent_template_id": 128, + "reasoning_difficulty": "hard", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 382, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/video-games/xbox-one.html?p=9", + "geolocation": null, + "intent_template": "Buy the {{product}} on the page {{detail}}.", + "intent": "Buy the controller (in any color) on the page with a black gradient.", + "instantiation_dict": { + "product": "controller (in any color)", + "detail": "with a black gradient" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B075JFXXTM" + ] + } + } + ] + }, + "intent_template_id": 129, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 383, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/video-games/xbox-one.html?p=13", + "geolocation": null, + "intent_template": "Buy the {{product}} on the page {{detail}}.", + "intent": "Buy the item on the page with Rick and Morty themes.", + "instantiation_dict": { + "product": "item", + "detail": "with Rick and Morty themes" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B081ZNFZZJ" + ] + } + } + ] + }, + "intent_template_id": 129, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 384, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy the {{product}} with {{detail}}.", + "intent": "Buy the item with a scared Mario Bros character in the \"Nintendo Switch\" category.", + "instantiation_dict": { + "product": "item", + "detail": "a scared Mario Bros character in the \"Nintendo Switch\" category" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07SK4W1VJ" + ] + } + } + ] + }, + "intent_template_id": 122, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 385, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Can you buy the {{item}} in the \"{{product_category}}\" category?", + "instantiation_dict": { + "item": "cheapest beer that comes in a box", + "product_category": "Alcoholic Beverages" + }, + "intent": "Can you buy the cheapest beer that comes in a box in the \"Alcoholic Beverages\" category?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B00PLXE90I" + ] + } + } + ] + }, + "intent_template_id": 130, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 386, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Can you buy the {{item}} in the \"{{product_category}}\" category?", + "instantiation_dict": { + "item": "incorrectly labeled Bud Light", + "product_category": "Alcoholic Beverages" + }, + "intent": "Can you buy the incorrectly labeled Bud Light in the \"Alcoholic Beverages\" category?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B00U56P3AQ" + ] + } + } + ] + }, + "intent_template_id": 130, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 387, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Can you buy the {{item}} in the \"{{product_category}}\" category?", + "instantiation_dict": { + "item": "cheapest box wine", + "product_category": "Alcoholic Beverages" + }, + "intent": "Can you buy the cheapest box wine in the \"Alcoholic Beverages\" category?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B01NBFKHYA" + ] + } + } + ] + }, + "intent_template_id": 130, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 388, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/beauty-personal-care/shave-hair-removal/men-s.html", + "geolocation": null, + "intent_template": "Buy the {{product}} on the page {{detail}}.", + "instantiation_dict": { + "product": "pink trimmer for women", + "detail": "" + }, + "intent": "Buy the pink trimmer for women on the page .", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B08WKDNLXV" + ] + } + } + ] + }, + "intent_template_id": 129, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "comments": "", + "overall_difficulty": "easy" + }, + { + "sites": [ + "shopping" + ], + "task_id": 389, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Can you buy the {{item}} in the \"{{product_category}}\" category?", + "instantiation_dict": { + "item": "most expensive item (in any size) that uses a mannequin in its display", + "product_category": "Virtual Reality (VR) Headsets" + }, + "intent": "Can you buy the most expensive item (in any size) that uses a mannequin in its display in the \"Virtual Reality (VR) Headsets\" category?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B09NBRVLHN" + ] + } + } + ] + }, + "intent_template_id": 130, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 390, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Rate {{product}} in the {{category}} category with {{num_star}} stars, using my nickname {{nickname}}?", + "instantiation_dict": { + "product": "the controller with a happy couple on it", + "category": "\"Legacy Systems > Xbox Systems\"", + "num_star": 5, + "nickname": "EmLo" + }, + "intent": "Rate the controller with a happy couple on it in the \"Legacy Systems > Xbox Systems\" category with 5 stars, using my nickname EmLo?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating('B09P819K5N')", + "required_contents": { + "must_include": [ + "100" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_author('B09P819K5N')", + "required_contents": { + "must_include": [ + "EmLo" + ] + } + } + ] + }, + "intent_template_id": 131, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "comments": "", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 391, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Rate {{product}} in the {{category}} category with {{num_star}} stars, using my nickname {{nickname}}?", + "instantiation_dict": { + "product": "the controller with a sideways display image that is not a stock photo", + "category": "\"Legacy Systems > Xbox Systems\"", + "num_star": 0, + "nickname": "EmLo" + }, + "intent": "Rate the controller with a sideways display image that is not a stock photo in the \"Legacy Systems > Xbox Systems\" category with 0 stars, using my nickname EmLo?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating('B00BH4V6HE')", + "required_contents": { + "must_include": [ + "0" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_author('B00BH4V6HE')", + "required_contents": { + "must_include": [ + "EmLo" + ] + } + } + ] + }, + "intent_template_id": 131, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "comments": "", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 392, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Can you buy the {{item}} in the \"{{product_category}}\" category?", + "instantiation_dict": { + "item": "purple and blue controllers with lightning and galaxy designs", + "product_category": "Legacy Systems > Playstation Systems" + }, + "intent": "Can you buy the purple and blue controllers with lightning and galaxy designs in the \"Legacy Systems > Playstation Systems\" category?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07C85HK2J", + "Blue+ Purple" + ] + } + } + ] + }, + "intent_template_id": 130, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 393, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/video-games/legacy-systems/playstation-systems.html?p=12", + "geolocation": null, + "intent_template": "Buy the {{product}} on the page {{detail}}.", + "intent": "Buy the item on the page with Toys R Us on the packaging.", + "instantiation_dict": { + "product": "item", + "detail": "with Toys R Us on the packaging" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B004DWOSTU" + ] + } + } + ] + }, + "intent_template_id": 129, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 394, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/video-games/legacy-systems/playstation-systems.html?p=14", + "geolocation": null, + "intent_template": "Buy the {{product}} on the page {{detail}}.", + "intent": "Buy the PSP on the page which has a case in its display image.", + "instantiation_dict": { + "product": "PSP", + "detail": "which has a case in its display image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B000YQL3G0" + ] + } + } + ] + }, + "intent_template_id": 129, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 395, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/video-games/legacy-systems/nintendo-systems.html?p=1", + "geolocation": null, + "intent_template": "Buy the {{product}} on the page {{detail}}.", + "intent": "Buy the Animal Crossing product on the page with a character showing its teeth.", + "instantiation_dict": { + "product": "Animal Crossing product", + "detail": "with a character showing its teeth" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B01D92FIRK" + ] + } + } + ] + }, + "intent_template_id": 129, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 396, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/video-games/legacy-systems/nintendo-systems.html?p=4", + "geolocation": null, + "intent_template": "Buy the {{product}} on the page {{detail}}.", + "intent": "Buy the item on the page with a pink background on the display image (in any edition).", + "instantiation_dict": { + "product": "item", + "detail": "with a pink background on the display image (in any edition)" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B001IAP45W" + ] + } + } + ] + }, + "intent_template_id": 129, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 397, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/video-games/nintendo-switch.html?p=2", + "geolocation": null, + "intent_template": "Buy the {{product}} on the page {{detail}}.", + "intent": "Buy the item on the page with a banana theme.", + "instantiation_dict": { + "product": "item", + "detail": "with a banana theme" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B0931NN4PR" + ] + } + }, + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "Fortnite Fleece Bundle" + ] + } + } + ] + }, + "intent_template_id": 129, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 398, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/video-games/nintendo-switch.html?p=3", + "geolocation": null, + "intent_template": "Given the image, buy the {{product}} which contain the same object in the image on its design.", + "intent": "Given the image, buy the item on this page (in any style) which contain the same object in the image on its design.", + "instantiation_dict": { + "product": "item on this page (in any style)" + }, + "image": "https://images.pexels.com/photos/207142/pexels-photo-207142.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B0786JC6VW" + ] + } + } + ] + }, + "intent_template_id": 123, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 399, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/", + "geolocation": null, + "intent_template": "Can you buy the {{item}} in the \"{{product_category}}\" category?", + "instantiation_dict": { + "item": "cheapest cereal with a graphic character on the box", + "product_category": "Cereals" + }, + "intent": "Can you buy the cheapest cereal with a graphic character on the box in the \"Cereals\" category?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07QFH3BGS" + ] + } + } + ] + }, + "intent_template_id": 130, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "medium" + }, + { + "sites": [ + "shopping" + ], + "task_id": 400, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/video-games/legacy-systems/nintendo-systems.html?p=4", + "geolocation": null, + "intent_template": "Buy the {{product}} with {{detail}}.", + "intent": "Buy the Nunchuk controller (in any color) on the page with a shadow outline on the display image.", + "instantiation_dict": { + "product": "Nunchuk controller (in any color) on the page", + "detail": "a shadow outline on the display image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B0094X2066" + ] + } + } + ] + }, + "intent_template_id": 122, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 401, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/breads-bakery/cakes.html?p=2", + "geolocation": null, + "intent_template": "Buy the {{product}} on the page {{detail}}.", + "intent": "Buy the cake on the page with a green turtle.", + "instantiation_dict": { + "product": "cake", + "detail": "with a green turtle" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There are no cakes with green turtles available on this page.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 129, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 402, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/breads-bakery/cakes.html?p=2", + "geolocation": null, + "intent_template": "Buy the {{product}} on the page {{detail}}.", + "intent": "Buy the cakes on the page with Korean packaging.", + "instantiation_dict": { + "product": "cakes", + "detail": "with Korean packaging" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There are no Korean cakes on this page.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 129, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 403, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/breads-bakery/cakes.html?p=4", + "geolocation": null, + "intent_template": "Buy the {{product}} on the page {{detail}}.", + "intent": "Buy the cake on the page with a kitchen countertop in the display image.", + "instantiation_dict": { + "product": "cake", + "detail": "with a kitchen countertop in the display image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07TGKXHFY" + ] + } + } + ] + }, + "intent_template_id": 129, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 404, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/snacks-sweets/snack-foods.html?p=33&product_list_order=price", + "geolocation": null, + "intent_template": "Buy the {{product}} on the page {{detail}}.", + "intent": "Buy the snacks on the page with a basketball player on the packaging.", + "instantiation_dict": { + "product": "snacks", + "detail": "with a basketball player on the packaging" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B08M1YSJ7F" + ] + } + } + ] + }, + "intent_template_id": 129, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 405, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy me a {{product}} that {{relation}} in the image{{detail}}.", + "intent": "Buy me a product that has the same function as the items in the image from the \"Cereals\" category.", + "image": "https://images.pexels.com/photos/187498/pexels-photo-187498.jpeg?auto=compress&cs=tinysrgb&w=800", + "instantiation_dict": { + "product": "product", + "relation": "has the same function as the items", + "detail": " from the \"Cereals\" category" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B09JV2P5FH" + ] + } + } + ] + }, + "intent_template_id": 125, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 406, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy the {{product}} with {{detail}}.", + "intent": "Buy the cereal with a beach volleyball player on the packaging.", + "instantiation_dict": { + "product": "cereal", + "detail": "a beach volleyball player on the packaging" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07J4ZTLWG" + ] + } + } + ] + }, + "intent_template_id": 122, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 407, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sports-outdoors/fan-shop.html", + "geolocation": null, + "intent_template": "Buy the {{product}} with {{detail}}.", + "intent": "Buy the top on the page with Boston Celtics colors.", + "instantiation_dict": { + "product": "top on the page", + "detail": "Boston Celtics colors" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07B313S75" + ] + } + } + ] + }, + "intent_template_id": 122, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 408, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy me a {{product}} that has a skyline of the city shown in this image.", + "intent": "Buy me a pair of large shorts that has a skyline of the city shown in this image.", + "image": "https://images.pexels.com/photos/3964406/pexels-photo-3964406.jpeg?auto=compress&cs=tinysrgb&w=800", + "instantiation_dict": { + "product": "pair of large shorts" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07MV1JFND", + "Large" + ] + } + } + ] + }, + "intent_template_id": 132, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 409, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy me a {{product}} that has a skyline of the city shown in this image.", + "intent": "Buy me a large hoodie that has a skyline of the city shown in this image.", + "image": "https://images.pexels.com/photos/419235/pexels-photo-419235.jpeg?auto=compress&cs=tinysrgb&w=800", + "instantiation_dict": { + "product": "large hoodie" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07XT2SC56", + "Large" + ] + } + } + ] + }, + "intent_template_id": 132, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 410, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy me a {{product}} that references a sports team from this city.", + "intent": "Buy me a video game controller that references a sports team from this city.", + "image": "https://images.pexels.com/photos/2539395/pexels-photo-2539395.jpeg?auto=compress&cs=tinysrgb&w=800", + "instantiation_dict": { + "product": "video game controller" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B01N57KNX7" + ] + } + } + ] + }, + "intent_template_id": 133, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 411, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy me a {{product}} that references the city in the image.", + "intent": "Buy me a size small shirt under 20 dollars that references the city in the image.", + "image": "https://images.pexels.com/photos/1239162/pexels-photo-1239162.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "size small shirt under 20 dollars" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07R75ZV6F" + ] + } + }, + { + "url": "__SHOPPING__/checkout/cart", + "locator": "func:get_query_text(__page__, \"#shopping-cart-table\")", + "required_contents": { + "must_include": [ + "Small" + ] + } + } + ] + }, + "intent_template_id": 134, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 412, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy me a {{product}} that references a sports team from this city.", + "intent": "Buy me a pair of shorts over 50 dollars (in any size) that references a sports team from this city.", + "image": "https://images.pexels.com/photos/1239162/pexels-photo-1239162.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "pair of shorts over 50 dollars (in any size)" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07MV1MZWF" + ] + } + } + ] + }, + "intent_template_id": 133, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 413, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy me a {{product}} that references the city in the image.", + "intent": "Buy me a pair of small shorts under 30 dollars that references the city in the image.", + "image": "https://images.pexels.com/photos/1239162/pexels-photo-1239162.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2", + "instantiation_dict": { + "product": "pair of small shorts under 30 dollars" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07PT4DPK2", + "Small" + ] + } + } + ] + }, + "intent_template_id": 134, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 414, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/beauty-personal-care/oral-care/toothbrushes-accessories.html?p=4", + "geolocation": null, + "intent_template": "Rate {{product}} in the {{category}} category with {{num_star}} stars, using my nickname {{nickname}}?", + "instantiation_dict": { + "product": "the toothbrush on this page with a cupcake on it", + "category": "Toothbrushes & Accessories", + "num_star": 5, + "nickname": "EmLo" + }, + "intent": "Rate the toothbrush on this page with a cupcake on it in the Toothbrushes & Accessories category with 5 stars, using my nickname EmLo?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating('B09P55GY2P')", + "required_contents": { + "must_include": [ + "100" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_author('B09P55GY2P')", + "required_contents": { + "must_include": [ + "EmLo" + ] + } + } + ] + }, + "intent_template_id": 131, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 415, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/beauty-personal-care/oral-care/toothbrushes-accessories.html?p=5", + "geolocation": null, + "intent_template": "Rate {{product}} in the {{category}} category with {{num_star}} stars, using my nickname {{nickname}}?", + "instantiation_dict": { + "product": "the toothbrush on this page with a winking character on it", + "category": "Toothbrushes & Accessories", + "num_star": 5, + "nickname": "EmLo" + }, + "intent": "Rate the toothbrush on this page with a winking character on it in the Toothbrushes & Accessories category with 5 stars, using my nickname EmLo?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating('B09S3PK7R3')", + "required_contents": { + "must_include": [ + "100" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_author('B09S3PK7R3')", + "required_contents": { + "must_include": [ + "EmLo" + ] + } + } + ] + }, + "intent_template_id": 131, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "comments": "", + "overall_difficulty": "hard" + }, + { + "sites": [ + "shopping" + ], + "task_id": 416, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/beauty-personal-care/personal-care/deodorants-antiperspirants.html", + "geolocation": null, + "intent_template": "Buy {{product}} with {{detail}} on the packaging.", + "intent": "Buy deoderant on the page with the phrase 'extra extra dry' on the packaging.", + "instantiation_dict": { + "product": "deoderant on the page", + "detail": "the phrase 'extra extra dry'" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B01IADXR9E" + ] + } + } + ] + }, + "intent_template_id": 120, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 417, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy {{product}} with {{detail}} on the packaging.", + "intent": "Buy the cheapest deoderant from the \"Deodorants & Antiperspirants\" category with the phrase 'killer' on the packaging.", + "instantiation_dict": { + "product": "the cheapest deoderant from the \"Deodorants & Antiperspirants\" category", + "detail": "the phrase 'killer'" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B073VBXY7L" + ] + } + } + ] + }, + "intent_template_id": 120, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 418, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy {{product}} with {{detail}} on the packaging.", + "intent": "Buy the cheapest item from the \"Fragrance > Men's\" category with plastic on the packaging.", + "instantiation_dict": { + "product": "the cheapest item from the \"Fragrance > Men's\" category", + "detail": "plastic" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07BFHHG97" + ] + } + } + ] + }, + "intent_template_id": 120, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 419, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy {{product}} with {{detail}} on the packaging.", + "intent": "Buy the most expensive men's fragrance with an animal on the packaging.", + "instantiation_dict": { + "product": "the most expensive men's fragrance", + "detail": "an animal" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B01M6BJEOJ" + ] + } + } + ] + }, + "intent_template_id": 120, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 420, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy {{product}} with {{detail}} on the packaging.", + "intent": "Buy the most expensive wine making kit with a happy couple on the packaging.", + "instantiation_dict": { + "product": "the most expensive wine making kit", + "detail": "a happy couple" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B085SPWHJS" + ] + } + } + ] + }, + "intent_template_id": 120, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 421, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Help me to add the cheapest {{color}} {{item}} to my wishlist", + "intent": "Help me to add the cheapest purple phone stand above $8 to my wishlist", + "instantiation_dict": { + "color": "purple", + "item": "phone stand above $8" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Lazy Cell Phone Holder - Universal Phone Holder to Wear Around Neck Lazy Bracket Free Rotating Smart Mobile Phone Mount Stand", + "Purple" + ] + } + } + ] + }, + "intent_template_id": 135, + "reasoning_difficulty": "medium", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 422, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Help me to add the cheapest {{color}} {{item}} to my wishlist", + "intent": "Help me to add the cheapest fully white women's shoes (any size) to my wishlist", + "instantiation_dict": { + "color": "fully white", + "item": "women's shoes (any size)" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Cute Sandals for Women Beach Comfort Sandals Casual Daily Slip On Platform Sandals Flat Beach Shoes Dressy Slides", + "A1-white" + ] + } + } + ] + }, + "intent_template_id": 135, + "reasoning_difficulty": "medium", + "visual_difficulty": "easy", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 423, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Help me to add the cheapest {{color}} {{item}} to my wishlist", + "intent": "Help me to add the cheapest white humidifier to my wishlist", + "instantiation_dict": { + "color": "white", + "item": "humidifier" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "4L Humidifiers for Bedroom - Top Fill Air Humidifier for Large Room, Auto Shut-off Cool Mist Humidifier, Essential-Oil Diffuser, LED Display, Constant Humidity, Mist Mode for Home, Office, Plants" + ] + } + } + ] + }, + "intent_template_id": 135, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 424, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Please add the cheapest {{description}} {{item}} to my shopping cart", + "intent": "Please add the cheapest anime poster (from the \"Posters & Print\" category) to my shopping cart", + "instantiation_dict": { + "description": "anime", + "item": "poster (from the \"Posters & Print\" category)" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "HWEMAKES Demon Slayer Poster Decorative Painting Canvas Wall Art Living Room Posters Bedroom Painting 16x24inch(40x60cm)" + ] + } + } + ] + }, + "intent_template_id": 136, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 425, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Please add the cheapest {{description}} {{item}} to my shopping cart", + "intent": "Please add the cheapest round smartwatch (from the \"Smartwatches\" category) to my shopping cart", + "instantiation_dict": { + "description": "round", + "item": "smartwatch (from the \"Smartwatches\" category)" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "IDEALROYAL Smart Watch,Fitness Tracker with Heart Rate Monitor,IP68 Waterproof, Blood Pressure,Sleep Monitor, Full Touch Screen Smart Watch for Women Men for Android & iOS (Pink)" + ] + } + } + ] + }, + "intent_template_id": 136, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 426, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Please add the cheapest {{description}} {{item}} to my shopping cart", + "intent": "Please add the cheapest standing projection screen to my shopping cart", + "instantiation_dict": { + "description": "standing", + "item": "projection screen" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "EPSELPSC80 - Duet Ultra Portable Projection Screen" + ] + } + } + ] + }, + "intent_template_id": 136, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 427, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food.html?p=7", + "geolocation": null, + "intent_template": "Can you add all the {{items}} {{condition}} to my shopping cart?", + "intent": "Can you add all the items on this page with korean words to my shopping cart?", + "instantiation_dict": { + "items": "items on this page", + "condition": "with korean words" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Tofu Broth Seasoning (Seafood)", + "NH WELLHEIM Variety Seoul Tteokbokki Hot& Spicy, Spicy Carbo, and Jjajang Flavors \"Spicy Challenge Box\" (3 COMBO, PACK OF 6)" + ] + } + } + ] + }, + "intent_template_id": 72, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 428, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/catalogsearch/result/index/?p=8&q=chair", + "geolocation": null, + "intent_template": "Can you add all the {{items}} {{condition}} to my shopping cart?", + "intent": "Can you add all the items on this page with wheels to my shopping cart?", + "instantiation_dict": { + "items": "items on this page", + "condition": "with wheels" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Modern Upholstered Tufted Velvet Office Chair Adjustable Vanity Chair Makeup Chair Cute Desk Chair Mid Back Computer Chair Accent Chairs for Bedroom, Blue" + ] + } + } + ] + }, + "intent_template_id": 72, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 429, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/electronics/computers-accessories/computers-tablets.html?p=5", + "geolocation": null, + "intent_template": "Can you add all the {{items}} {{condition}} to my shopping cart?", + "intent": "Can you add all the items on this page with a stylus to my shopping cart?", + "instantiation_dict": { + "items": "items on this page", + "condition": "with a stylus" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Navitech 7\" Purple Case/Cover with 360 Rotational Stand & Stylus Pen Compatible with The Hisense Sero Pro 8 / Haier Pad Mini 7.5", + "Toshiba Dynabook Tecra A30-G 13.3\" FHD Business Laptop Computer, Celeron 5205U Processor @ 1.9GHz, 8GB DDR4 RAM, 256GB PCIe SSD, WiFi 6, Bluetooth, Type-C, Windows 10 Pro Education, 64GB Flash Drive" + ] + } + } + ] + }, + "intent_template_id": 72, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 430, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Help me find all items between {{lower}} and {{upper}} that look like the image below and put them in my cart.", + "intent": "Help me find all items between $40 and $50 that look like the image below and put them in my cart.", + "image": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B01GBZZA48.0.jpg", + "instantiation_dict": { + "lower": "$40", + "upper": "$50" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "125ft Weather Seal Quad Shield Outdoor 3GHZ RG-6 Coaxial Cable 75 Ohm (Satellite TV or Broadband Internet) Anti Corrosion Brass Connector RG6 Fittings Assembled in USA by PHAT SATELLITE INTL", + "GE RG6 Coaxial Cable, 100 ft. F-Type Connectors, Quad Shielded Coax Cable, 3 GHz Digital, In-Wall Rated, Ideal for TV Antenna, DVR, VCR, Satellite, Cable Box, Home Theater, Black, 34842", + "SiriusXM Satellite Radio 75 Foot RG6 Coaxial Cable Antenna Cable Extension Kit Works with All Sirius, XM and SiriusXM Radio Receivers and Boomboxes", + "Ucland RP-SMA Male to SMA Male Adapter Connector RG174 Coaxial Cable for Satellite Television Black 2m", + "55ft TRI-Shield 14AWG 75 Ohm Gel Coated Braid Direct Burial Underground RG-11 Coax HD Cable TV Antenna Weather Seal All Brass CONNECTORS UL ETL Cut to Order Assembled in USA", + "65ft Made in USA Plenum RG-11 Coax 3Ghz HD Cable TV Antenna 14AWG 75 Ohm All Brass CONNECTORS CMP UL ETL Anti-Static Commercial FIRE Retardant Cut to Order Assembled in USA by PHAT SATELLITE INTL", + "90ft TRI-Shield 14AWG 75 Ohm Gel Coated Braid Direct Burial Underground RG-11 Coax HD Cable TV Antenna Weather Seal All Brass CONNECTORS UL ETL Cut to Order Assembled in USA", + "75 Ft Pro Brand RG6 Coax Cable Black Solid Copper Center Conductor Digital 2.25 GHz Satellite HDTV Dish 75 Ohm Shielded Braided Satellite Dish Off-Air HDTV Aerial Antenna Video Jumper Signal", + "GE RG6 Coaxial Cable, 100 ft. F-Type Connectors, Quad Shielded Coax Cable, 3 GHz Digital, In-Wall Rated, Ideal for TV Antenna, DVR, VCR, Satellite, Cable Box, Home Theater, Black, 34842", + "Ucland RP-SMA Male to SMA Male Adapter Connector RG174 Coaxial Cable for Satellite Television Black 2m" + ] + } + } + ] + }, + "intent_template_id": 137, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 431, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Help me find all items between {{lower}} and {{upper}} that look like the image below and put them in my cart.", + "intent": "Help me find all items between $10 and $20 that look like the image below and put them in my cart.", + "image": "https://images.pexels.com/photos/239578/pexels-photo-239578.jpeg", + "instantiation_dict": { + "lower": "$10", + "upper": "$20" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "6 Pack Assorted Macarons | Gluten Free | Almond Overload, Blueberry Cheesecake & Chocolate Raspberry", + "12 Pack | Cheesecake Combo French Macarons Value Pack" + ] + } + } + ] + }, + "intent_template_id": 137, + "reasoning_difficulty": "hard", + "visual_difficulty": "medium", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 432, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Help me find all items between {{lower}} and {{upper}} that look like the image below and put them in my cart.", + "intent": "Help me find all items between $30 and $40 that look like the image below and put them in my cart.", + "image": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B07Q9QGBGV.0.jpg", + "instantiation_dict": { + "lower": "$30", + "upper": "$40" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Magic Array Wireless Charger, 10w Ultra-Thin Qi Fast Wireless Charging Pad, Compatible with iPhone X/XS/MAX/8/8 Plus/Galaxy Note 9/S9/S10/S9 Plus/Note 8/S8 Edge More (Adapter NOT Included) (Pink)", + "Magic Array Wireless Charger, 10w Ultra-Thin Qi Fast Wireless Charging Pad, Compatible with iPhone X/XS/MAX/8/8 Plus/Galaxy Note 9/S9/S10/S9 Plus/Note 8/S8 Edge More (Adapter NOT Included) (Brown)" + ] + } + } + ] + }, + "intent_template_id": 137, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 433, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me the cheapest {{property}} version of the thing in this image and add it to my cart please.", + "intent": "Find me the cheapest small-packet version of the thing in this image and add it to my cart please.", + "image": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B01NBY6BCJ.0.jpg", + "instantiation_dict": { + "property": "small-packet" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Evaxo Sun-Maid Raisins 24 x 1 oz .#B" + ] + } + } + ] + }, + "intent_template_id": 138, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 434, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me the cheapest {{property}} version of the thing in this image and add it to my cart please.", + "intent": "Find me the cheapest thick, original version of the thing in this image and add it to my cart please.", + "image": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B07KKXXM67.0.jpg", + "instantiation_dict": { + "property": "thick, original" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "REESE'S Peanut Butter Cups, Milk Chocolate, Snack Size (Pack of 2 Pounds)" + ] + } + } + ] + }, + "intent_template_id": 138, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 435, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me the cheapest {{property}} version of the thing in this image and add it to my cart please.", + "intent": "Find me the cheapest strawberry version of the thing in this image and add it to my cart please.", + "image": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B08L3MNSCW.1.jpg", + "instantiation_dict": { + "property": "strawberry" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Kit kat chocolate strawberry 12 bars 1 bags Japan import" + ] + } + } + ] + }, + "intent_template_id": 138, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 436, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me the cheapest item that costs {{range}} and can {{function}} like in the image shown and add it to my cart.", + "intent": "Find me the cheapest item that costs more than $6 and can project onto a screen like in the image shown and add it to my cart.", + "image": "https://images.pexels.com/photos/2507025/pexels-photo-2507025.jpeg", + "instantiation_dict": { + "range": "more than $6", + "function": "project onto a screen" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Sony VPL-VW1000ES 4K Home Theater ES Projector (2012 Model)" + ] + } + } + ] + }, + "intent_template_id": 139, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 437, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me the cheapest item that costs {{range}} and can {{function}} like in the image shown and add it to my cart.", + "intent": "Find me the cheapest item that costs between $3-4 and can hold up a phone like in the image shown and add it to my cart.", + "image": "https://images.pexels.com/photos/728842/pexels-photo-728842.jpeg", + "instantiation_dict": { + "range": "between $3-4", + "function": "hold up a phone" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Wireless Bluetooth Selfie Stick and Tripod Combination - Compatible for Samsung S10 Lite & Most Android/iOS Smart Phones" + ] + } + } + ] + }, + "intent_template_id": 139, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 438, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Navigate to the category where this image is most likely to be found. Then, find me the cheapest item that costs {{range}} and can {{function}} like in the image shown and add it to my cart.", + "intent": "Navigate to the category where this image is most likely to be found. Then, find me the cheapest item that costs more than $5 and can charge multiple devices like in the image shown and add it to my cart.", + "image": "https://images.pexels.com/photos/8101107/pexels-photo-8101107.jpeg", + "instantiation_dict": { + "range": "more than $5", + "function": "charge multiple devices" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Philips 6-Outlet Extender Surge Protector, 3-Prong, Wall Adapter Plug, Space Saving Design, 1020J, UL Listed, White, SPS1006WA/37 |OR| J.VOLT 4 Outlet Power Strip, 15A 125V 1875W, 90 Joules, 20-Inch Short Cord with Angled Plug, Small Power Strip Surge Protector, ETL Listed" + ] + } + } + ] + }, + "intent_template_id": 140, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "The second item is a power strip (which may be more suitable for the image), but the first can also charge multiple things." + }, + { + "sites": [ + "shopping" + ], + "task_id": 439, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Navigate to the category where I can find {{item}} that can help me {{function}}. Add the cheapest one to my cart.", + "intent": "Navigate to the category where I can find camera that can help me take photos like this. Add the cheapest one to my cart.", + "image": "https://images.pexels.com/photos/1618606/pexels-photo-1618606.jpeg", + "instantiation_dict": { + "item": "camera", + "function": "take photos like this" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Action Camera Ultra HD 4K Action Camera 10m Waterproof 2.0' Screen 1080p Sport Camera Cam Driving Recorder Tachograph HD (Bundle : Camera Add 64GB Card, Color : Blue)" + ] + } + } + ] + }, + "intent_template_id": 141, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 440, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Navigate to the category where I can find {{item}} that can help me {{function}}. Add the cheapest one to my cart.", + "intent": "Navigate to the category where I can find the ingredient that can help me make the drink in the photo. Add the cheapest one to my cart.", + "image": "https://images.pexels.com/photos/15125130/pexels-photo-15125130/free-photo-of-coffee-beans-around-coffee-cup.jpeg", + "instantiation_dict": { + "item": "the ingredient", + "function": "make the drink in the photo" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Hills Bros Instant Sugar-Free Double Mocha Cappuccino Mix, Easy to Use, Enjoy Coffeehouse Flavor from Home, Frothy and 8 g of Carbs, 12 Oz" + ] + } + } + ] + }, + "intent_template_id": 141, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 441, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "My kid loves horses, and I want a topper for this. Help me add the least expensive one to my wishlist.", + "intent": "My kid loves horses, and I want a topper for this. Help me add the least expensive one to my wishlist.", + "image": "https://images.pexels.com/photos/18955559/pexels-photo-18955559/free-photo-of-a-cupcake-with-strawberry-frosting.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Horse Cupcake Toppers Horse Racing Party Cake Decoration for Horse Theme Birthday Party Supplies SET of 24" + ] + } + } + ] + }, + "intent_template_id": 142, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 442, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Put the least expensive item that has the thing in the first image {{relationship}} the thing in the second image in my wishlist", + "intent": "Put the least expensive item that has the thing in the first image in the shape of the thing in the second image in my wishlist", + "image": [ + "https://images.pexels.com/photos/8680077/pexels-photo-8680077.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "https://images.pexels.com/photos/2036544/pexels-photo-2036544.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1" + ], + "instantiation_dict": { + "relationship": "in the shape of" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Student Flip Phone for Ferrari Car-Shape Phone Flip Keypad Car Model Mobile Phone GSM Cell Phone 100-240V, Support SMS, MP3, Camera, Video Playback, Recording, Radio, Bluetooth, Calculator(White)" + ] + } + } + ] + }, + "intent_template_id": 143, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 443, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Put the least expensive item that has the thing in the first image {{relationship}} the thing in the second image in my wishlist", + "intent": "Put the least expensive item that has the thing in the first image imprinted on the thing in the second image in my wishlist", + "image": [ + "https://images.pexels.com/photos/18885866/pexels-photo-18885866/free-photo-of-person-in-deadpool-costume-on-bridge.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "https://images.pexels.com/photos/2323435/pexels-photo-2323435.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1" + ], + "instantiation_dict": { + "relationship": "imprinted on" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/?limit=50", + "locator": "document.querySelector('.products-grid.wishlist').textContent", + "required_contents": { + "must_include": [ + "Vanknight Playstation 4 Dualshock PS4 Controller Skin Vinyl Decals Skins Stickers 2 Pack for PS4 Controller Skins PS4 Skins Deadpool" + ] + } + } + ] + }, + "intent_template_id": 143, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 444, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/health-household/diet-sports-nutrition/nutrition-bars-drinks.html?price=40-50", + "geolocation": null, + "intent_template": "Place the item with {{quality}} on this page into my cart", + "intent": "Place the item with the highest amount of protein per bar on this page into my cart", + "instantiation_dict": { + "quality": "the highest amount of protein per bar" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Garden Of Life, Fit Bar Smores Organic, 1.9 Ounce" + ] + } + } + ] + }, + "intent_template_id": 144, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 445, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/grocery-gourmet-food/breads-bakery/cookies.html?price=80-90", + "geolocation": null, + "intent_template": "Place the item with {{quality}} on this page into my cart", + "intent": "Place the item with the darkest packaging on this page into my cart", + "instantiation_dict": { + "quality": "the darkest packaging" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "B.A.D. FOOD CO The Peanutter - Peanut Butter Cookies | 5.4oz \u2013 12 Pack | , Grain-Free, Gluten-Free, Refined Sugar Free, No-Soy, Dairy-Free, Non-GMO, Paleo Friendly, and Nutrient Dense!" + ] + } + } + ] + }, + "intent_template_id": 144, + "reasoning_difficulty": "easy", + "visual_difficulty": "easy", + "overall_difficulty": "easy", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 446, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/electronics/television-video/televisions.html?p=4&price=0-100", + "geolocation": null, + "intent_template": "Place the item with {{quality}} on this page into my cart", + "intent": "Place the item with the thickest bezels on this page into my cart", + "instantiation_dict": { + "quality": "the thickest bezels" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "SAMSUNG 40 inches LED Smart FDHTV 1080P (Renewed)" + ] + } + } + ] + }, + "intent_template_id": 144, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 447, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/cell-phones-accessories/cases-holsters-sleeves.html?price=10-20", + "geolocation": null, + "intent_template": "Add all the items with {{quality}} on this page into my cart", + "intent": "Add all the items with a strap on this page into my cart", + "instantiation_dict": { + "quality": "a strap" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "IKASEFU Compatible with iPhone 6 Plus/6S Plus Case Glitter Shiny butterfly Rhinestone Floral Pu Leather Diamond Flash Bling Wallet Strap Case with Card Holder Magnetic Kickstand Flip Cover,Rose gold", + "Compatible with Samsung Galaxy A51 4G Wallet Case and Tempered Glass Screen Protector Flip Card Holder Cell Accessories Folio Purse Phone Cover for Glaxay A 51 Gaxaly M40S 51A A515F S51 Blue" + ] + } + } + ] + }, + "intent_template_id": 145, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 448, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/office-products/office-furniture-lighting/chairs-sofas.html", + "geolocation": null, + "intent_template": "Add all the items with {{quality}} on this page into my cart", + "intent": "Add all the items with no armrests on this page into my cart", + "instantiation_dict": { + "quality": "no armrests" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Rolling Swivel Stool Chairs with Back Wheels Height Adjustable PU Leather Massage Chairs Round Stools with Wheels for Medical Clinic Salon Home Office Hobby Desk Grey", + "BOWERY HILL Metal 30'' Backless Bar Stool in Black-Antique Gold" + ] + } + } + ] + }, + "intent_template_id": 145, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 449, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/patio-lawn-garden/gardening-lawn-care/plants-seeds-bulbs.html?p=3", + "geolocation": null, + "intent_template": "Add all the items with {{quality}} on this page into my cart", + "intent": "Add all the items with flowers on this page into my cart", + "instantiation_dict": { + "quality": "flowers" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Four (4) Orchid Cactus (Epiphyllum) 8\" Fresh Cuttings: 2 for White and 2 for Red Flower", + "Hirt's White Christmas Cactus Plant - Zygocactus - 6\" Pot", + "Hawaiian Red Plumeria Plant Cutting Kanoa Hawaii 1 Pack SK34", + "Knockout Double Pink Rose, 1 Gal" + ] + } + } + ] + }, + "intent_template_id": 145, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 450, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/stackable-clear-shoe-box-front-drop-magnetic-door-dustproof-and-breathable-sneaker-storage-box-shoe-box-organizer-for-women-and-men-fit-up-to-us-size-16.html", + "geolocation": null, + "intent_template": "Find me the cheapest {{item}} with the same {{characteristic}} as the majority of the {{items}} shown in the second image on this page and put it in my cart", + "intent": "Find me the cheapest shoe with the same brand as the majority of the shoes shown in the second image on this page and put it in my cart", + "instantiation_dict": { + "item": "shoe", + "items": "shoes", + "characteristic": "brand" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Nike SB Mens Charge Canvas Canvas Lifestyle Fashion Sneakers Gray 9.5 Medium (D)" + ] + } + } + ] + }, + "intent_template_id": 146, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 451, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me the cheapest {{item}} {{condition}} and add one of any size to my shopping cart.", + "intent": "Find me the cheapest shoe with a multi-colored word printed on it and add one of any size to my shopping cart.", + "instantiation_dict": { + "item": "shoe", + "condition": "with a multi-colored word printed on it" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "SWEETSHOPZ Hummingbird Geometric Holographic Color White Active Running Walking Sneakers Shoes for Women Men Unisex Kids Adults Fashion Sports" + ] + } + } + ] + }, + "intent_template_id": 147, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 452, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Find me the cheapest {{item}} {{condition}} and add one of any size to my shopping cart.", + "intent": "Find me the cheapest shoe with marine life printed on it and add one of any size to my shopping cart.", + "instantiation_dict": { + "item": "shoe", + "condition": "with marine life printed on it" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "MAPOLO Animals Words Men's Boys Casual Walking Shoes Sneaker Lightweight Stylish Athletic Tennis Sports Running Shoes for Outdoor Hiking Travel Driving" + ] + } + } + ] + }, + "intent_template_id": 147, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 453, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sweetshopz-beer-irish-wallpaper-st-patrick-day-active-running-walking-sneakers-shoes-for-women-men-unisex-kids-adults-fashion-sports-black.html |AND| __SHOPPING__/clothing-shoes-jewelry/men/shoes.html?p=4&product_list_order=price", + "geolocation": null, + "intent_template": "Find me {{item}}. Add one (in any size) to my cart.", + "intent": "Find me a product in the Shoes tab that has the closest color to the sneakers in the other tab. Add one (in any size) to my cart.", + "instantiation_dict": { + "item": "a product in the Shoes tab that has the closest color to the sneakers in the other tab" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "HUTSENDOX Pillow Slippers Slides for Women Men House Cloud Slipper Shower Slide Sandals Anti-Slip Summer Slippers for Indoor Outdoor", + "Green" + ] + } + } + ] + }, + "intent_template_id": 148, + "reasoning_difficulty": "medium", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 454, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/men-s-flip-flop-sandals-comfortable-slip-on-water-shoes-non-slip-flat-thong-sandals.html", + "geolocation": null, + "intent_template": "Find me the cheapest {{item}} {{condition}} and add one of any size to my shopping cart.", + "intent": "Find me the cheapest fruit between $40 and $60 that looks like these shoes and add one of any size to my shopping cart.", + "instantiation_dict": { + "item": "fruit", + "condition": "between $40 and $60 that looks like these shoes" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Small Hass Avocado" + ] + } + } + ] + }, + "intent_template_id": 147, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 455, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/xue-juan-1983-feng-shui-decoration-modern-bubble-girl-sculpture-decoration-home-tv-cabinet-character-statue-decor-ornament-study-office-desktop-bust-character-resin-statue-ornaments-desktop-ornaments.html", + "geolocation": null, + "intent_template": "Find {{item}} {{condition}}. Add it to my shopping cart.", + "intent": "Find a wig between $50 and $100 that best matches this sculpture's hair. Add it to my shopping cart.", + "instantiation_dict": { + "item": "a wig", + "condition": "between $50 and $100 that best matches this sculpture's hair" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Gwen Wig Color 34 Dark Brown with Gray - Foxy Silver Wigs Short Feathered Cut Wispy Bangs Human Hair African American Womens Lightweight Average Cap Bundle w/MaxWigs Hairloss Booklet", + "Gray" + ] + } + } + ] + }, + "intent_template_id": 149, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 456, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/wapbeng-white-european-and-american-wig-ladies-short-curly-hair-wig-42cm-womens-wigs-lovely-fashion-curly-cosplay-wigs.html", + "geolocation": null, + "intent_template": "Add to my cart {{item}} {{condition}}.", + "intent": "Add to my cart a wig that looks most like this but with longer hair that is between $50 and $100.", + "instantiation_dict": { + "item": "a wig that looks most like this", + "condition": "but with longer hair that is between $50 and $100" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Wavy Hair Wigs - Wigs Synthetic Wigs,for Art Photo Shoot Wig Props and Cosplay Costume Full Wig 100% density (Silver)" + ] + } + } + ] + }, + "intent_template_id": 150, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 457, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/nachic-wall-3-piece-wall-art-for-bedroom-black-and-white-teal-rose-canvas-wall-art-still-life-flower-paintings-giclee-print-contemporary-bathroom-wall-decor-framed-ready-to-hang.html", + "geolocation": null, + "intent_template": "Add to my cart {{item}} {{condition}}.", + "intent": "Add to my cart a poster set that looks like this but is pink and not more than $7 more expensive.", + "instantiation_dict": { + "item": "a poster set that looks like this", + "condition": "but is pink and not more than $7 more expensive" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "3 Piece Vintage Canvas Wall Art Pink Peony Flowers Bouquet with White Vase on Books Pictures Romantic Floral Painting Prints Gift Framed for Home Bathroom Bedroom Wall Decor 12\" x 16\" x 3 Panels" + ] + } + } + ] + }, + "intent_template_id": 150, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 458, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/kokopelli-the-ancient-ones-kokopelli-southwest-cotton-woven-blanket-throw-made-in-the-usa-72x54.html", + "geolocation": null, + "intent_template": "Add to my cart {{item}} {{condition}}.", + "intent": "Add to my cart all blankets (including this one) with the same two people holding up the blanket in the photo.", + "instantiation_dict": { + "item": "all blankets (including this one)", + "condition": "with the same two people holding up the blanket in the photo" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Aracnafaria - Anne Stokes Gothic Collection - Cotton Woven Blanket Throw - Made in The USA (72x54)", + "Heaven's Gate 1 - and God Will Open Wide The Gates of Heaven - Scriptures - 2 Peter 1:11 - Sympathy - Cotton Woven Blanket Throw - Made in The USA (72x54)", + "Jesus Footprints in The Sand - Sympathy - Cotton Woven Blanket Throw - Made in The USA (72x54)", + "Kokopelli The Ancient Ones - Kokopelli - Southwest - Cotton Woven Blanket Throw - Made in The USA (72x54)", + "Russet and Green - Southwest Native American Inspired Tribal Camp - Cotton Woven Blanket Throw - Made in The USA (72x54)", + "US Marine Corps - Land Sea Air - Cotton Woven Blanket Throw - Made in The USA (72x54)" + ] + } + } + ] + }, + "intent_template_id": 150, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "Can be solved by searching for 'Pure Country Weavers blankets'" + }, + { + "sites": [ + "shopping" + ], + "task_id": 459, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/butterfly-dance-helen-vladykina-cotton-woven-blanket-throw-made-in-the-usa-72x54.html", + "geolocation": null, + "intent_template": "Add to my cart {{item}} {{condition}}.", + "intent": "Add to my cart all blankets (including this one) with the same two people holding up the blanket in the photo.", + "instantiation_dict": { + "item": "all blankets (including this one)", + "condition": "with the same two people holding up the blanket in the photo" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Frida Kahlo - Beautiful and Strong - Cotton Woven Blanket Throw - Made in The USA (72x54)", + "Hawk - Animal Spirits Totem - Sue Coccia - Cotton Woven Blanket Throw - Made in The USA (72x54)", + "Sloth Wanna Hang - Victoria Borges - Cotton Woven Blanket Throw - Made in The USA (72x54)", + "Butterfly Dance - Helen Vladykina - Cotton Woven Blanket Throw - Made in The USA (72x54)" + ] + } + } + ] + }, + "intent_template_id": 150, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "Can be solved by searching for 'Pure Country Weavers blankets'" + }, + { + "sites": [ + "shopping" + ], + "task_id": 460, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/vinyl-skin-sticker-for-playstation-4-pro-blue-fire-ps4-pro-console-and-controllers-skins-vinyl-sticker-decal-cover.html", + "geolocation": null, + "intent_template": "I've been thinking about ordering {{item}}, but {{condition}}. Can you add it to my shopping cart if it exists?", + "intent": "I've been thinking about ordering an Xbox One controller shell, but one that has a matte finish. Can you add it to my shopping cart if it exists?", + "instantiation_dict": { + "item": "an Xbox One controller shell", + "condition": "one that has a matte finish" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "eXtremeRate Blue Flame Faceplate Cover, Soft Touch Front Housing Shell Case, Comfortable Soft Grip Replacement Kit for Microsoft Xbox One X & One S Controller Model 1708 - Controller NOT Included" + ] + } + } + ] + }, + "intent_template_id": 59, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 461, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add to my cart {{item}} {{condition}}.", + "intent": "Add to my cart a wig that costs between $50 and $100 and can help me cosplay the character in the image.", + "image": "https://images.pexels.com/photos/13352513/pexels-photo-13352513.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1", + "instantiation_dict": { + "item": "a wig that costs between $50 and $100", + "condition": "and can help me cosplay the character in the image" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Anime Vocaloid Hatsune Miku Synthetic Wig Costume Game Play Wigs Cosplay miku Long wavy blue Hair Wigs + wig cap One Size PL-823" + ] + } + } + ] + }, + "intent_template_id": 150, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 462, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/noldares-womens-heeled-sandals-fashion-squared-toe-backless-indoor-sandals-mules-stiletto-high-heels-slip-on-heeled-slippers.html", + "geolocation": null, + "intent_template": "Find {{item}} {{condition}}. Add it to my shopping cart.", + "intent": "Find a zebra-striped version of these heels from the same brand with the same pyramid shaped base. Add it to my shopping cart.", + "instantiation_dict": { + "item": "a zebra-striped version of these heels from the same brand", + "condition": "with the same pyramid shaped base" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "NOLDARES Heels for Women Heeled Sandals Slip-On Casual Square Toe Slippers Sandals Fashion Stilettos Heeled Party Single Pumps" + ] + } + } + ] + }, + "intent_template_id": 149, + "reasoning_difficulty": "hard", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 463, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/motivational-posters-for-classroom-office-decorations-inspirational-quote-wall-art-for-teachers-students-school-counselors-home-office-set-of-10-creative-chalkboard-designs.html", + "geolocation": null, + "intent_template": "Add this (in any size) to my cart if {{condition}}", + "intent": "Add this (in any size) to my cart if the at least one of the posters has the word \"inspire\" printed on it", + "instantiation_dict": { + "condition": "the at least one of the posters has the word \"inspire\" printed on it" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_exclude": [ + "Motivational Posters for Classroom & Office Decorations | Inspirational Quote Wall Art for Teachers, Students, School Counselors, Home & Office | Set of 10 Creative Chalkboard Designs" + ] + } + } + ] + }, + "intent_template_id": 151, + "reasoning_difficulty": "easy", + "visual_difficulty": "hard", + "overall_difficulty": "hard", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 464, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/love-live-quote-art-wall-canvas-painting-dream-art-picture-black-white-print-and-poster-for-living-room-home-decor-70x100cm-3pcs-frameless.html", + "geolocation": null, + "intent_template": "Add this (in any size) to my cart if {{condition}}", + "intent": "Add this (in any size) to my cart if the at least one of the posters has the word \"love\" printed on it", + "instantiation_dict": { + "condition": "the at least one of the posters has the word \"love\" printed on it" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Love Live Quote Art Wall Canvas Painting Dream Art Picture Black White Print And Poster For Living Room Home Decor (70x100cm\u00d73pcs) Frameless" + ] + } + } + ] + }, + "intent_template_id": 151, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + }, + { + "sites": [ + "shopping" + ], + "task_id": 465, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/ihappywall-3-pieces-bathroom-canvas-wall-art-teal-style-bathtub-bath-set-towel-relax-soothe-unwind-bathroom-still-life-picture-poster-print-on-canvas-stretched-and-framed-ready-to-hang.html", + "geolocation": null, + "intent_template": "Add this (in any size) to my cart if {{condition}}", + "intent": "Add this (in any size) to my cart if the at least one of the posters has the word \"happy\" printed on it", + "instantiation_dict": { + "condition": "the at least one of the posters has the word \"happy\" printed on it" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_exclude": [ + "iHAPPYWALL 3 Pieces Bathroom Canvas Wall Art Teal Style Bathtub Bath Set Towel Relax Soothe Unwind Bathroom Still Life Picture Poster Print On Canvas Stretched and Framed Ready To Hang" + ] + } + } + ] + }, + "intent_template_id": 151, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "overall_difficulty": "medium", + "comments": "" + } +] \ No newline at end of file diff --git a/VAB-WebArena-Lite/config_files/wa/test_webarena.json b/VAB-WebArena-Lite/config_files/wa/test_webarena.json new file mode 100644 index 0000000..d7cbeb9 --- /dev/null +++ b/VAB-WebArena-Lite/config_files/wa/test_webarena.json @@ -0,0 +1,29604 @@ +[ + { + "sites": [ + "shopping_admin" + ], + "task_id": 0, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What is the top-{{n}} best-selling product in {{year}}", + "instantiation_dict": { + "n": 1, + "year": 2022 + }, + "intent": "What is the top-1 best-selling product in 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Quest Lumaflex\u2122 Band" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Quest Lumaflex\u2122 Band" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 1, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What is the top-{{n}} best-selling brand in {{period}}", + "instantiation_dict": { + "n": 1, + "period": "Quarter 1 2022" + }, + "intent": "What is the top-1 best-selling brand in Quarter 1 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Sprite" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Sprite" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 2, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What is the top-{{n}} best-selling product type in {{period}}", + "instantiation_dict": { + "n": 1, + "period": "Quarter 1 2022" + }, + "intent": "What is the top-1 best-selling product type in Quarter 1 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Yoga ball" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Yoga ball" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 3, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What are the top-{{n}} best-selling product in {{year}}", + "instantiation_dict": { + "n": 2, + "year": 2022 + }, + "intent": "What are the top-2 best-selling product in 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Quest Lumaflex\u2122 Band", + "Sprite Stasis Ball 65 cm" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Quest Lumaflex\u2122 Band, Sprite Stasis Ball 65 cm" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 4, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What are the top-{{n}} best-selling product in {{period}}", + "instantiation_dict": { + "n": 3, + "period": "Jan 2023" + }, + "intent": "What are the top-3 best-selling product in Jan 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Impulse Duffle", + "Overnight Duffle", + "Hawkeye Yoga Short-32-Blue" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Impulse Duffle, Overnight Duffle, Hawkeye Yoga Short-32-Blue" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 5, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What is the top-{{n}} best-selling product type in {{period}}", + "instantiation_dict": { + "n": 1, + "period": "Jan 2023" + }, + "intent": "What is the top-1 best-selling product type in Jan 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Duffle" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Duffle" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 6, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What are the top-{{n}} best-selling product in {{year}}", + "instantiation_dict": { + "n": 5, + "year": 2023 + }, + "intent": "What are the top-5 best-selling product in 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Sprite Yoga Strap 6 foot", + "Overnight Duffle", + "Ida Workout Parachute Pant-29-Purple", + "Hawkeye Yoga Short-32-Blue", + "Sprite Stasis Ball 65 cm" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Sprite Yoga Strap 6 foot, Overnight Duffle, Ida Workout Parachute Pant-29-Purple, Hawkeye Yoga Short-32-Blue, Sprite Stasis Ball 65 cm" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "map" + ], + "task_id": 7, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Tell me the full address of all {{airport_type}} that are within a driving distance of {{radius}} to {{start}}", + "instantiation_dict": { + "airport_type": "international airports", + "start": "Carnegie Mellon University", + "radius": "50 km" + }, + "intent": "Tell me the full address of all international airports that are within a driving distance of 50 km to Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Pittsburgh International Airport, Southern Beltway, Findlay Township, Allegheny County, 15231, United States" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Pittsburgh International Airport People Movers, Airport Boulevard, Findlay Township, Allegheny County, Pennsylvania, 15231, United States" + }, + "intent_template_id": 79 + }, + { + "sites": [ + "map" + ], + "task_id": 8, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Tell me the full address of all {{airport_type}} that are within a driving distance of {{radius}} to {{start}}", + "instantiation_dict": { + "airport_type": "international airports", + "start": "Carnegie Mellon University", + "radius": "5 km" + }, + "intent": "Tell me the full address of all international airports that are within a driving distance of 5 km to Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "There is no airport within 5 km of Carnegie Mellon University" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "There is no airport within 5 km of Carnegie Mellon University" + }, + "intent_template_id": 79 + }, + { + "sites": [ + "map" + ], + "task_id": 9, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Tell me the full address of all {{airport_type}} that are within a driving distance of {{radius}} to {{start}}", + "instantiation_dict": { + "airport_type": "international airports", + "start": "Carnegie Art Museum", + "radius": "30 km" + }, + "intent": "Tell me the full address of all international airports that are within a driving distance of 30 km to Carnegie Art Museum", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Pittsburgh International Airport, Southern Beltway, Findlay Township, Allegheny County, 15231, United States" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Pittsburgh International Airport People Movers, Airport Boulevard, Findlay Township, Allegheny County, Pennsylvania, 15231, United States" + }, + "intent_template_id": 79 + }, + { + "sites": [ + "map" + ], + "task_id": 10, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Tell me the full address of all {{airport_type}} that are within a driving distance of {{radius}} to {{start}}", + "instantiation_dict": { + "airport_type": "US international airports", + "start": "Niagara Falls", + "radius": "60 km" + }, + "intent": "Tell me the full address of all US international airports that are within a driving distance of 60 km to Niagara Falls", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Niagara Falls International Airport, 2035, Niagara Falls Boulevard, City of Niagara Falls, Town of Wheatfield, Niagara County, New York, 14304, United States", + "Buffalo-Niagara International Airport, Holtz Drive, Town of Cheektowaga, Erie County, New York, 14225, United States" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Niagara Falls International Airport, 2035, Niagara Falls Boulevard, City of Niagara Falls, Town of Wheatfield, Niagara County, New York, 14304, United States Buffalo-Niagara International Airport, South Youngs Road, Town of Cheektowaga, Erie County, New York, 14221, United States" + }, + "intent_template_id": 79 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 11, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Tell me the the number of reviews that our store received by far that mention term \"{{term}}\"", + "instantiation_dict": { + "term": "disappointed" + }, + "intent": "Tell me the the number of reviews that our store received by far that mention term \"disappointed\"", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "6" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "6" + }, + "intent_template_id": 288 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 12, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Tell me the the number of reviews that our store received by far that mention term \"{{term}}\"", + "instantiation_dict": { + "term": "satisfied" + }, + "intent": "Tell me the the number of reviews that our store received by far that mention term \"satisfied\"", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "2" + }, + "intent_template_id": 288 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 13, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Tell me the the number of reviews that our store received by far that mention term \"{{term}}\"", + "instantiation_dict": { + "term": "decent" + }, + "intent": "Tell me the the number of reviews that our store received by far that mention term \"decent\"", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "2" + }, + "intent_template_id": 288 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 14, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Tell me the the number of reviews that our store received by far that mention term \"{{term}}\"", + "instantiation_dict": { + "term": "not useful" + }, + "intent": "Tell me the the number of reviews that our store received by far that mention term \"not useful\"", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 288 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 15, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Tell me the the number of reviews that our store received by far that mention term \"{{term}}\"", + "instantiation_dict": { + "term": "best" + }, + "intent": "Tell me the the number of reviews that our store received by far that mention term \"best\"", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "2" + }, + "intent_template_id": 288 + }, + { + "sites": [ + "map" + ], + "task_id": 16, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Compare the time for walking and driving route from {{start}} to {{end}}", + "instantiation_dict": { + "start": "5000 Fifth Avenue, Pittsburgh", + "end": "UPMC family health center" + }, + "intent": "Compare the time for walking and driving route from 5000 Fifth Avenue, Pittsburgh to UPMC family health center", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "driving: 2min", + "walking: 16min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Driving: 2min. Walking: 16min." + }, + "intent_template_id": 73 + }, + { + "sites": [ + "map" + ], + "task_id": 17, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Compare the time for walking and driving route from {{start}} to {{end}}", + "instantiation_dict": { + "start": "AMC Waterfront", + "end": "Carnegie Mellon University" + }, + "intent": "Compare the time for walking and driving route from AMC Waterfront to Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "driving: 13min", + "walking: 1h 35min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "driving: 13min, walking: 1h 35min." + }, + "intent_template_id": 73 + }, + { + "sites": [ + "map" + ], + "task_id": 18, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Compare the time for walking and driving route from {{start}} to {{end}}", + "instantiation_dict": { + "start": "AMC Waterfront", + "end": "Univ of Pittsburgh" + }, + "intent": "Compare the time for walking and driving route from AMC Waterfront to Univ of Pittsburgh", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "driving: 15min", + "walking: 1h 47min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "driving: 15min, walking: 1h 47min." + }, + "intent_template_id": 73 + }, + { + "sites": [ + "map" + ], + "task_id": 19, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Compare the time for walking and driving route from {{start}} to {{end}}", + "instantiation_dict": { + "start": "Carnegie Science Center", + "end": "Carnegie Mellon University" + }, + "intent": "Compare the time for walking and driving route from Carnegie Science Center to Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "driving: 12min", + "walking: 1h 44min." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "driving: 12min, walking: 1h 44min." + }, + "intent_template_id": 73 + }, + { + "sites": [ + "map" + ], + "task_id": 20, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Compare the difference in time for walking and driving route from {{start}} to {{end}}", + "instantiation_dict": { + "start": "Randyland", + "end": "Carnegie Mellon University" + }, + "intent": "Compare the difference in time for walking and driving route from Randyland to Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "driving: 13min", + "walking: 1h 45min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "driving: 13min, walking: 1h 45min." + }, + "intent_template_id": 73 + }, + { + "sites": [ + "shopping" + ], + "task_id": 21, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/6s-wireless-headphones-over-ear-noise-canceling-hi-fi-bass-foldable-stereo-wireless-kid-headsets-earbuds-with-built-in-mic-micro-sd-tf-fm-for-iphone-samsung-ipad-pc-black-gold.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention about {{description}}", + "instantiation_dict": { + "description": "ear cups being small" + }, + "intent": "List out reviewers, if exist, who mention about ear cups being small", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Joseph Brzezinski", + "Catso", + "Dibbins", + "Anglebert Dinkherhump", + "Michelle Davis" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Joseph Brzezinski, Catso, Dibbins, Anglebert Dinkherhump, Michelle Davis" + }, + "intent_template_id": 222 + }, + { + "sites": [ + "shopping" + ], + "task_id": 22, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/fujifilm-finepix-z200fd-10mp-digital-camera-with-5x-optical-dual-image-stabilized-zoom-black.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention about {{description}}", + "instantiation_dict": { + "description": "under water photo" + }, + "intent": "List out reviewers, if exist, who mention about under water photo", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no review about under water photo", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 222 + }, + { + "sites": [ + "shopping" + ], + "task_id": 23, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/3-pack-samsung-galaxy-s6-screen-protector-nearpow-tempered-glass-screen-protector-with-9h-hardness-crystal-clear-easy-bubble-free-installation-scratch-resist.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention about {{description}}", + "instantiation_dict": { + "description": "good fingerprint resistant" + }, + "intent": "List out reviewers, if exist, who mention about good fingerprint resistant", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Rachel", + "T. Gannon" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Rachel, T. Gannon, " + }, + "intent_template_id": 222 + }, + { + "sites": [ + "shopping" + ], + "task_id": 24, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/haflinger-men-s-wool-felt-open-back-slippers-beige-550-peat-us-7.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention about {{description}}", + "instantiation_dict": { + "description": "price being unfair" + }, + "intent": "List out reviewers, if exist, who mention about price being unfair", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no reivew about price being unfair", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 222 + }, + { + "sites": [ + "shopping" + ], + "task_id": 25, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/epson-workforce-wf-3620-wifi-direct-all-in-one-color-inkjet-printer-copier-scanner-amazon-dash-replenishment-ready.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention about {{description}}", + "instantiation_dict": { + "description": "average print quality" + }, + "intent": "List out reviewers, if exist, who mention about average print quality", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Goldfish", + "Roxanne Brandon Coffey" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "GoldfishGoldfish, Roxanne Brandon Coffey" + }, + "intent_template_id": 222 + }, + { + "sites": [ + "shopping" + ], + "task_id": 26, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/epson-workforce-wf-3620-wifi-direct-all-in-one-color-inkjet-printer-copier-scanner-amazon-dash-replenishment-ready.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention about {{description}}", + "instantiation_dict": { + "description": "complain of the customer service" + }, + "intent": "List out reviewers, if exist, who mention about complain of the customer service", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Bob in Vegas", + "RemyR" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Bob in Vegas, RemyRRemyR" + }, + "intent_template_id": 222 + }, + { + "sites": [ + "reddit" + ], + "task_id": 27, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the {{forum}} forum.", + "instantiation_dict": { + "forum": "Showerthoughts" + }, + "intent": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the Showerthoughts forum.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 33 + }, + { + "sites": [ + "reddit" + ], + "task_id": 28, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the {{forum}} forum.", + "instantiation_dict": { + "forum": "Worcester" + }, + "intent": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the Worcester forum.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 33 + }, + { + "sites": [ + "reddit" + ], + "task_id": 29, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the {{forum}} forum.", + "instantiation_dict": { + "forum": "DIY" + }, + "intent": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the DIY forum.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1" + }, + "intent_template_id": 33 + }, + { + "sites": [ + "reddit" + ], + "task_id": 30, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the {{forum}} forum.", + "instantiation_dict": { + "forum": "space" + }, + "intent": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the space forum.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 33 + }, + { + "sites": [ + "reddit" + ], + "task_id": 31, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the {{forum}} forum.", + "instantiation_dict": { + "forum": "photoshopbattles" + }, + "intent": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the photoshopbattles forum.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 33 + }, + { + "sites": [ + "map" + ], + "task_id": 32, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "I will arrive {{place}} soon. Provide the name of a {{target1}} in the vicinity, if available. Then, tell me the {{information}} to {{target2}} from the hotel.", + "instantiation_dict": { + "place": "Pittsburgh Airport", + "information": "the walking distance", + "target1": "Hilton hotel", + "target2": "the nearest supermarket own by a local company" + }, + "intent": "I will arrive Pittsburgh Airport soon. Provide the name of a Hilton hotel in the vicinity, if available. Then, tell me the the walking distance to the nearest supermarket own by a local company from the hotel.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "DoubleTree by Hilton Hotel Pittsburgh Airport", + "2.0km" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "DoubleTree by Hilton Hotel Pittsburgh Airport Distance: 2.0km" + }, + "intent_template_id": 78 + }, + { + "sites": [ + "map" + ], + "task_id": 33, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "I will arrive {{place}} soon. Provide the name of a {{target1}} in the vicinity, if available. Then, tell me the {{information}} to {{target2}} from the hotel.", + "instantiation_dict": { + "place": "Pittsburgh Airport", + "target1": "Hilton hotel", + "information": "the shortest walking distance", + "target2": "a supermarket" + }, + "intent": "I will arrive Pittsburgh Airport soon. Provide the name of a Hilton hotel in the vicinity, if available. Then, tell me the the shortest walking distance to a supermarket from the hotel.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "DoubleTree by Hilton Hotel Pittsburgh Airport", + "1.4km" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "DoubleTree by Hilton Hotel Pittsburgh Airport Distance: 1.4km" + }, + "intent_template_id": 78 + }, + { + "sites": [ + "map" + ], + "task_id": 34, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "I will arrive {{place}} soon. Provide the name of a {{target1}} in the vicinity, if available. Then, tell me the {{information}} to {{target2}} from the hotel.", + "instantiation_dict": { + "place": "Pittsburgh Airport", + "target1": "Hyatt hotel", + "information": "the shortest walking time", + "target2": "a supermarket" + }, + "intent": "I will arrive Pittsburgh Airport soon. Provide the name of a Hyatt hotel in the vicinity, if available. Then, tell me the the shortest walking time to a supermarket from the hotel.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Hyatt Regency Pittsburgh International Airport" + ], + "fuzzy_match": [ + "Time: 3h 30min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Hyatt Regency Pittsburgh International Airport\n3:30" + }, + "intent_template_id": 78 + }, + { + "sites": [ + "map" + ], + "task_id": 35, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "I will arrive {{place}} soon. Provide the name of a {{target1}} in the vicinity, if available. Then, tell me the {{information}} to {{target2}} from the hotel.", + "instantiation_dict": { + "place": "Pittsburgh Airport", + "target1": "Hyatt hotel", + "information": "the minimal driving time", + "target2": "a supermarket" + }, + "intent": "I will arrive Pittsburgh Airport soon. Provide the name of a Hyatt hotel in the vicinity, if available. Then, tell me the the minimal driving time to a supermarket from the hotel.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Hyatt Regency Pittsburgh International Airport" + ], + "fuzzy_match": [ + "Time: 15min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Hyatt Regency Pittsburgh International Airport Time: 15min" + }, + "intent_template_id": 78 + }, + { + "sites": [ + "map" + ], + "task_id": 36, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Check if the {{place}} in pittsburgh can be reached in one hour by car from {{location}}", + "instantiation_dict": { + "place": "social security administration", + "location": "Carnegie Mellon University" + }, + "intent": "Check if the social security administration in pittsburgh can be reached in one hour by car from Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": ["Yes"] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Yes" + }, + "intent_template_id": 77 + }, + { + "sites": [ + "map" + ], + "task_id": 37, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Check if the {{place}} in pittsburgh can be reached in one hour by car from {{location}}", + "instantiation_dict": { + "place": "police station", + "location": "gates building at CMU" + }, + "intent": "Check if the police station in pittsburgh can be reached in one hour by car from gates building at CMU", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": ["Yes"] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Yes" + }, + "intent_template_id": 77 + }, + { + "sites": [ + "map" + ], + "task_id": 38, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Check if the {{place}} in pittsburgh can be reached in one hour by car from {{location}}", + "instantiation_dict": { + "place": "duquesne university", + "location": "pittsburgh airport" + }, + "intent": "Check if the duquesne university in pittsburgh can be reached in one hour by car from pittsburgh airport", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": ["Yes"] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Yes" + }, + "intent_template_id": 77 + }, + { + "sites": [ + "map" + ], + "task_id": 39, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Check if the {{place}} in pittsburgh can be reached in one hour by car from {{location}}", + "instantiation_dict": { + "place": "walmart", + "location": "5600 fifth avenue" + }, + "intent": "Check if the walmart in pittsburgh can be reached in one hour by car from 5600 fifth avenue", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": ["Yes"] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Yes" + }, + "intent_template_id": 77 + }, + { + "sites": [ + "map" + ], + "task_id": 40, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Check if the {{place}} in pittsburgh can be reached in one hour by car from {{location}}", + "instantiation_dict": { + "place": "amc theatre", + "location": "hobart street" + }, + "intent": "Check if the amc theatre in pittsburgh can be reached in one hour by car from hobart street", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": ["Yes"] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Yes" + }, + "intent_template_id": 77 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 41, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "List the top {{n}} search terms in my store", + "instantiation_dict": { + "n": "1" + }, + "intent": "List the top 1 search terms in my store", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "hollister" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "hollister" + }, + "intent_template_id": 285 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 42, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "List the top {{n}} search terms in my store", + "instantiation_dict": { + "n": "2" + }, + "intent": "List the top 2 search terms in my store", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "hollister", + "Joust Bag" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "hollister, Joust Bag" + }, + "intent_template_id": 285 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 43, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "List the top {{n}} search terms in my store", + "instantiation_dict": { + "n": "3" + }, + "intent": "List the top 3 search terms in my store", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "hollister", + "Joust Bag", + "Antonia Racer Tank" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "hollister, Joust Bag, Antonia Race Tank" + }, + "intent_template_id": 285 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 44, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Check out my todos", + "instantiation_dict": {}, + "intent": "Check out my todos", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/dashboard/todos", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 303 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 45, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "Check out the most recent open issues", + "instantiation_dict": {}, + "intent": "Check out the most recent open issues", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/a11yproject/a11yproject.com/-/issues/?sort=created_asc&state=opened", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 300 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 46, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/primer/design", + "geolocation": null, + "intent_template": "Check out the most recent open issues", + "instantiation_dict": {}, + "intent": "Check out the most recent open issues", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/primer/design/-/issues/?sort=created_date&state=opened", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 300 + }, + { + "sites": [ + "shopping" + ], + "task_id": 47, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Today is 6/12/2023. Tell me how many fulfilled orders I have {{period}}, and the total amount of money I spent.", + "instantiation_dict": { + "period": "over the past month" + }, + "intent": "Today is 6/12/2023. Tell me how many fulfilled orders I have over the past month, and the total amount of money I spent.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "0 order", + "$0 total spend" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0 order, $0 total spend" + }, + "intent_template_id": 197 + }, + { + "sites": [ + "shopping" + ], + "task_id": 48, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Today is 6/12/2023. Tell me how many fulfilled orders I have {{period}}, and the total amount of money I spent.", + "instantiation_dict": { + "period": "over the past three days" + }, + "intent": "Today is 6/12/2023. Tell me how many fulfilled orders I have over the past three days, and the total amount of money I spent.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "0 order", + "$0 total spend" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0 order, $0 total spend" + }, + "intent_template_id": 197 + }, + { + "sites": [ + "shopping" + ], + "task_id": 49, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Today is 6/12/2023. Tell me how many fulfilled orders I have {{period}}, and the total amount of money I spent.", + "instantiation_dict": { + "period": "over the past four month" + }, + "intent": "Today is 6/12/2023. Tell me how many fulfilled orders I have over the past four month, and the total amount of money I spent.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "3 orders", + "$845.49 total spend" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3 orders, $845.49 total spend" + }, + "intent_template_id": 197 + }, + { + "sites": [ + "shopping" + ], + "task_id": 50, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Today is 6/12/2023. Tell me how many fulfilled orders I have {{period}}, and the total amount of money I spent.", + "instantiation_dict": { + "period": "over the past year" + }, + "intent": "Today is 6/12/2023. Tell me how many fulfilled orders I have over the past year, and the total amount of money I spent.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "24 orders", + "$6560.69 total spend" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "24 orders, $6560.69 total spend" + }, + "intent_template_id": 197 + }, + { + "sites": [ + "shopping" + ], + "task_id": 51, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Today is 6/12/2023. Tell me how many fulfilled orders I have {{period}}, and the total amount of money I spent.", + "instantiation_dict": { + "period": "over the past six month" + }, + "intent": "Today is 6/12/2023. Tell me how many fulfilled orders I have over the past six month, and the total amount of money I spent.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "12 orders", + "$1603.69 total spend" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "12 orders, $1603.69 total spend" + }, + "intent_template_id": 197 + }, + { + "sites": [ + "map" + ], + "task_id": 52, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "How long does it take to walk from {{start}} to {{end}}?", + "instantiation_dict": { + "start": "Carnegie Mellon University", + "end": "starbucks on Craig Street" + }, + "intent": "How long does it take to walk from Carnegie Mellon University to starbucks on Craig Street?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "7 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "7 min" + }, + "intent_template_id": 68 + }, + { + "sites": [ + "map" + ], + "task_id": 53, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "How long does it take to walk from {{start}} to {{end}}?", + "instantiation_dict": { + "start": "Univ of Pittsburgh", + "end": "starbucks on Craig Street" + }, + "intent": "How long does it take to walk from Univ of Pittsburgh to starbucks on Craig Street?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "18 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "18 min" + }, + "intent_template_id": 68 + }, + { + "sites": [ + "map" + ], + "task_id": 54, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "How long does it take to walk from {{start}} to {{end}}?", + "instantiation_dict": { + "start": "Carnegie Mellon University", + "end": "Univ of Pittsburgh" + }, + "intent": "How long does it take to walk from Carnegie Mellon University to Univ of Pittsburgh?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "25 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "25 min" + }, + "intent_template_id": 68 + }, + { + "sites": [ + "map" + ], + "task_id": 55, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "How long does it take to walk from {{start}} to {{end}}?", + "instantiation_dict": { + "start": "the starbuck near CMU", + "end": "Chatham university" + }, + "intent": "How long does it take to walk from the starbuck near CMU to Chatham university?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "30 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "30 min" + }, + "intent_template_id": 68 + }, + { + "sites": [ + "map" + ], + "task_id": 56, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "How long does it take to walk from {{start}} to {{end}}?", + "instantiation_dict": { + "start": "Carnegie Museum of Art", + "end": "a library at CMU" + }, + "intent": "How long does it take to walk from Carnegie Museum of Art to a library at CMU?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "11 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "11 min" + }, + "intent_template_id": 68 + }, + { + "sites": [ + "map" + ], + "task_id": 57, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Tell me the closest {{place1}}(s) to {{place2}}", + "instantiation_dict": { + "place1": "restaurant", + "place2": "university center at Carnegie Mellon University" + }, + "intent": "Tell me the closest restaurant(s) to university center at Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "El Gallo de Oro", + "Back Bar Grill", + "Grano", + "Beefsteak", + "Nourish", + "Schatz Dining Room", + "Au Bon Pain" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "El Gallo de Oro, Back Bar Grill, Grano, Beefsteak, Nourish, Schatz Dining Room, Au Bon Pain" + }, + "intent_template_id": 69 + }, + { + "sites": [ + "map" + ], + "task_id": 58, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Tell me the closest {{place1}}(s) to {{place2}}", + "instantiation_dict": { + "place1": "cafe", + "place2": "CMU Hunt library" + }, + "intent": "Tell me the closest cafe(s) to CMU Hunt library", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "De Fer Coffee & Tea" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "De Fer Coffee & Tea" + }, + "intent_template_id": 69 + }, + { + "sites": [ + "map" + ], + "task_id": 59, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Tell me the closest {{place1}}(s) to {{place2}}", + "instantiation_dict": { + "place1": "restaurant", + "place2": "CMU Hunt library" + }, + "intent": "Tell me the closest restaurant(s) to CMU Hunt library", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "The exchange" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "The exchange" + }, + "intent_template_id": 69 + }, + { + "sites": [ + "map" + ], + "task_id": 60, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Tell me the closest {{place1}}(s) to {{place2}}", + "instantiation_dict": { + "place1": "restaurant", + "place2": "CMU Posner Hall" + }, + "intent": "Tell me the closest restaurant(s) to CMU Posner Hall", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "The exchange" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "The exchange" + }, + "intent_template_id": 69 + }, + { + "sites": [ + "map" + ], + "task_id": 61, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Tell me the closest {{place1}}(s) to {{place2}}", + "instantiation_dict": { + "place1": "restaurant", + "place2": "CMU Sorrells Library" + }, + "intent": "Tell me the closest restaurant(s) to CMU Sorrells Library", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "La Prima Espresso" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "La Prima Espresso" + }, + "intent_template_id": 69 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 62, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Which customer has completed the {{quantifier}} number of orders in the entire history?", + "instantiation_dict": { + "quantifier": "most" + }, + "intent": "Which customer has completed the most number of orders in the entire history?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Jane Smith" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Jane Smith" + }, + "intent_template_id": 276 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 63, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Which customer(s) has completed the {{quantifier}} number of orders in the entire history?", + "instantiation_dict": { + "quantifier": "second most" + }, + "intent": "Which customer(s) has completed the second most number of orders in the entire history?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Adam Garcia", + "Michael Nguyen", + "Sarah Miller" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Adam Garcia, Michael Nguyen, Sarah Miller" + }, + "intent_template_id": 276 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 64, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Which customer has placed {{number}} orders in the entire history?", + "instantiation_dict": { + "number": "2" + }, + "intent": "Which customer has placed 2 orders in the entire history?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Lisa Kim", + "Lisa Green", + "Julia Williams", + "Brian Smith", + "Alexander Thomas" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lisa Kim, Lisa Green, Julia Williams, Brian Smith, Alexander Thomas" + }, + "intent_template_id": 276 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 65, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Which customer has completed the {{quantifier}} number of orders in the entire history?", + "instantiation_dict": { + "quantifier": "fifth most" + }, + "intent": "Which customer has completed the fifth most number of orders in the entire history?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Jane Doe" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Jane Doe" + }, + "intent_template_id": 276 + }, + { + "sites": [ + "reddit" + ], + "task_id": 66, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Among the top {{number}} post in \"{{subreddit}}\" forum, {{description}}", + "instantiation_dict": { + "number": 10, + "subreddit": "books", + "description": "show me the post URLs that recommand a single book" + }, + "intent": "Among the top 10 post in \"books\" forum, show me the post URLs that recommand a single book", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "http://www.reddit.com/f/books/59396/apple-books-has-a-free-audiobook-of-a-christmas-carol", + "http://www.reddit.com/f/books/17445/i-just-finished-reading-the-hobbit-to-my-6-year-old-daughter" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "http://localhost:28080/f/books/59396/apple-books-has-a-free-audiobook-of-a-christmas-carol, http://localhost:28080/f/books/17445/i-just-finished-reading-the-hobbit-to-my-6-year-old-daughter" + }, + "intent_template_id": 17 + }, + { + "sites": [ + "reddit" + ], + "task_id": 67, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Among the top {{number}} post in \"{{subreddit}}\" forum, {{description}}", + "instantiation_dict": { + "number": 10, + "subreddit": "books", + "description": "show me the book names from posts that recommand a single book" + }, + "intent": "Among the top 10 post in \"books\" forum, show me the book names from posts that recommand a single book", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "A Christmas Carol", + "The Hobbit" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "A Christmas Carol, The Hobbit" + }, + "intent_template_id": 17 + }, + { + "sites": [ + "reddit" + ], + "task_id": 68, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Among the top {{number}} post in \"{{subreddit}}\" forum, {{description}}", + "instantiation_dict": { + "number": 10, + "subreddit": "books", + "description": "show me the author name and the book name from posts that recommand a single book" + }, + "intent": "Among the top 10 post in \"books\" forum, show me the author name and the book name from posts that recommand a single book", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "A Christmas Carol", + "Levar Burton", + "The Hobbit", + "J. R. R. Tolkien" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "A Christmas Carol by Levar Burton: , The Hobbit by J. R. R. Tolkien" + }, + "intent_template_id": 17 + }, + { + "sites": [ + "reddit" + ], + "task_id": 69, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Among the top {{number}} post in \"{{subreddit}}\" forum, {{description}}", + "instantiation_dict": { + "number": 10, + "subreddit": "books", + "description": "is there any post talks about supporting local book stores? If so, tell me the organizations involved" + }, + "intent": "Among the top 10 post in \"books\" forum, is there any post talks about supporting local book stores? If so, tell me the organizations involved", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "bookshop.org" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "bookshop.org" + }, + "intent_template_id": 17 + }, + { + "sites": [ + "map" + ], + "task_id": 70, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the zip code of {{place}}?", + "instantiation_dict": { + "place": "Carnegie Mellon University" + }, + "intent": "What is the zip code of Carnegie Mellon University?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "15213" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "15213" + }, + "intent_template_id": 70 + }, + { + "sites": [ + "map" + ], + "task_id": 71, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the zip code of {{place}}?", + "instantiation_dict": { + "place": "Chatham University" + }, + "intent": "What is the zip code of Chatham University?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "15232" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "15232" + }, + "intent_template_id": 70 + }, + { + "sites": [ + "map" + ], + "task_id": 72, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the zip code of {{place}}?", + "instantiation_dict": { + "place": "Yale University" + }, + "intent": "What is the zip code of Yale University?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "06516" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "06516" + }, + "intent_template_id": 70 + }, + { + "sites": [ + "map" + ], + "task_id": 73, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the zip code of {{place}}?", + "instantiation_dict": { + "place": "Columbia University" + }, + "intent": "What is the zip code of Columbia University?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "10027" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "10027" + }, + "intent_template_id": 70 + }, + { + "sites": [ + "map" + ], + "task_id": 74, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Given the following locations, {{place_list}}, what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.", + "instantiation_dict": { + "place_list": [ + "Carnegie Mellon University", + "apple store shadyside", + "starbucks on craig street" + ] + }, + "intent": "Given the following locations, ['Carnegie Mellon University', 'apple store shadyside', 'starbucks on craig street'], what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "The order is Carnegie Mellon University, starbucks on forbes ave, apple store shadyside" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Carnegie Mellon University, starbucks on forbes ave, apple store shadyside" + }, + "intent_template_id": 65 + }, + { + "sites": [ + "map" + ], + "task_id": 75, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Given the following locations, {{place_list}}, what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.", + "instantiation_dict": { + "place_list": [ + "Massachusetts Institute of Technology", + "Harvard University", + "Boston Logan International Airport" + ] + }, + "intent": "Given the following locations, ['Massachusetts Institute of Technology', 'Harvard University', 'Boston Logan International Airport'], what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "The order is Massachusetts Institute of Technology, Harvard University, Boston Logan International Airport" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Massachusetts Institute of Technology, Harvard University, Boston Logan International Airport" + }, + "intent_template_id": 65 + }, + { + "sites": [ + "map" + ], + "task_id": 76, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Given the following locations, {{place_list}}, what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.", + "instantiation_dict": { + "place_list": [ + "Princeton University", + "Yale University", + "Harvard University" + ] + }, + "intent": "Given the following locations, ['Princeton University', 'Yale University', 'Harvard University'], what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "The order is Princeton University, Yale University, Harvard University" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Princeton University, Yale University, Harvard University" + }, + "intent_template_id": 65 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 77, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What is the total count of {{status}} reviews amongst all the reviews?", + "instantiation_dict": { + "status": "Pending" + }, + "intent": "What is the total count of Pending reviews amongst all the reviews?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "5" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "5" + }, + "intent_template_id": 277 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 78, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What is the total count of {{status}} reviews amongst all the reviews?", + "instantiation_dict": { + "status": "Approved" + }, + "intent": "What is the total count of Approved reviews amongst all the reviews?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "346" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "346" + }, + "intent_template_id": 277 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 79, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What is the total count of {{status}} reviews amongst all the reviews?", + "instantiation_dict": { + "status": "Not Approved" + }, + "intent": "What is the total count of Not Approved reviews amongst all the reviews?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 277 + }, + { + "sites": [ + "map" + ], + "task_id": 80, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the duration required to first walk from {{place_A}} to {{place_B}}, and then drive to {{place_C}}?", + "instantiation_dict": { + "place_A": "Carnegie Mellon University", + "place_B": "Starbucks on Craig Street", + "place_C": "Pittsburgh International Airport" + }, + "intent": "What is the duration required to first walk from Carnegie Mellon University to Starbucks on Craig Street, and then drive to Pittsburgh International Airport?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "38 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "38 min" + }, + "intent_template_id": 72 + }, + { + "sites": [ + "map" + ], + "task_id": 81, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the duration required to first walk from {{place_A}} to {{place_B}}, and then drive to {{place_C}}?", + "instantiation_dict": { + "place_A": "Univ of Pittsburgh", + "place_B": "starbucks on Craig Street", + "place_C": "Pittsburgh International Airport" + }, + "intent": "What is the duration required to first walk from Univ of Pittsburgh to starbucks on Craig Street, and then drive to Pittsburgh International Airport?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "49 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "49 min" + }, + "intent_template_id": 72 + }, + { + "sites": [ + "map" + ], + "task_id": 82, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the duration required to first walk from {{place_A}} to {{place_B}}, and then drive to {{place_C}}?", + "instantiation_dict": { + "place_A": "Massachusetts Institute of Technology", + "place_B": "Harvard University", + "place_C": "Boston Logan International Airport" + }, + "intent": "What is the duration required to first walk from Massachusetts Institute of Technology to Harvard University, and then drive to Boston Logan International Airport?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "63 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "63 min" + }, + "intent_template_id": 72 + }, + { + "sites": [ + "map" + ], + "task_id": 83, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the duration required to first walk from {{place_A}} to {{place_B}}, and then drive to {{place_C}}?", + "instantiation_dict": { + "place_A": "Carnegie Mellon University", + "place_B": "apple store shadyside", + "place_C": "starbucks on craig street" + }, + "intent": "What is the duration required to first walk from Carnegie Mellon University to apple store shadyside, and then drive to starbucks on craig street?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "22 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "22 min" + }, + "intent_template_id": 72 + }, + { + "sites": [ + "map" + ], + "task_id": 84, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "From my stay at {{hotel}}, what's the estimated driving time to reach {{place}}?", + "instantiation_dict": { + "hotel": "DoubleTree by Hilton New York Downtown", + "place": "Keens Steakhouse" + }, + "intent": "From my stay at DoubleTree by Hilton New York Downtown, what's the estimated driving time to reach Keens Steakhouse?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "14 minutes" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "14 minutes" + }, + "intent_template_id": 64 + }, + { + "sites": [ + "map" + ], + "task_id": 85, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "From my stay at {{hotel}}, what's the estimated driving time to reach {{place}}?", + "instantiation_dict": { + "hotel": "La Quinta Inn near the airport", + "place": "Carnegie Mellon University" + }, + "intent": "From my stay at La Quinta Inn near the airport, what's the estimated driving time to reach Carnegie Mellon University?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "30 minutes" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "30 minutes" + }, + "intent_template_id": 64 + }, + { + "sites": [ + "map" + ], + "task_id": 86, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "From my stay at {{hotel}}, what's the estimated driving time to reach {{place}}?", + "instantiation_dict": { + "hotel": "La Quinta Inn near the airport", + "place": "Upitt" + }, + "intent": "From my stay at La Quinta Inn near the airport, what's the estimated driving time to reach Upitt?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "29 minutes" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "29 minutes" + }, + "intent_template_id": 64 + }, + { + "sites": [ + "map" + ], + "task_id": 87, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "From my stay at {{hotel}}, what's the estimated driving time to reach {{place}}?", + "instantiation_dict": { + "hotel": "red roof inn", + "place": "Pittsburgh science museum" + }, + "intent": "From my stay at red roof inn, what's the estimated driving time to reach Pittsburgh science museum?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "20 minutes" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "20 minutes" + }, + "intent_template_id": 64 + }, + { + "sites": [ + "map" + ], + "task_id": 88, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "From my stay at {{hotel}}, what's the estimated driving time to reach {{place}}?", + "instantiation_dict": { + "hotel": "Homewood Suites Southpointe", + "place": "PPG Paints Arena" + }, + "intent": "From my stay at Homewood Suites Southpointe, what's the estimated driving time to reach PPG Paints Arena?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "34 minutes" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "34 minutes" + }, + "intent_template_id": 64 + }, + { + "sites": [ + "map" + ], + "task_id": 89, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Which US states border {{state}}?", + "instantiation_dict": { + "state": "Connecticut" + }, + "intent": "Which US states border Connecticut?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Rhode Island", + "Massachusetts", + "New York" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Rhode Island, Massachusetts, New York" + }, + "intent_template_id": 67 + }, + { + "sites": [ + "map" + ], + "task_id": 90, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Which US states border {{state}}?", + "instantiation_dict": { + "state": "Pennsylvania" + }, + "intent": "Which US states border Pennsylvania?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Ohio", + "Maryland", + "New York", + "New Jersey", + "Delaware", + "West Virginia" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Ohio, Maryland, New York, New Jersey, Delaware, West Virginia" + }, + "intent_template_id": 67 + }, + { + "sites": [ + "map" + ], + "task_id": 91, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Which US states border {{state}}?", + "instantiation_dict": { + "state": "Massachusetts" + }, + "intent": "Which US states border Massachusetts?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Rhode Island", + "Connecticut", + "New York", + "New Hampshire", + "Vermont" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Rhode Island, Connecticut, New York, New Hampshire, Vermont" + }, + "intent_template_id": 67 + }, + { + "sites": [ + "map" + ], + "task_id": 92, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Which US states border {{state}}?", + "instantiation_dict": { + "state": "Vermont" + }, + "intent": "Which US states border Vermont?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "New York", + "New Hampshire", + "Massachusetts" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "New York, New Hampshire, Massachusetts" + }, + "intent_template_id": 67 + }, + { + "sites": [ + "map" + ], + "task_id": 93, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Which US states border {{state}}?", + "instantiation_dict": { + "state": "New Hampshire" + }, + "intent": "Which US states border New Hampshire?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Massachusetts", + "Vermont", + "Maine" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Massachusetts, Vermont, Maine" + }, + "intent_template_id": 67 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 94, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Telll me the grand total of invoice {{id}}.", + "instantiation_dict": { + "id": "000000001" + }, + "intent": "Telll me the grand total of invoice 000000001.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "36.39" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$36.39" + }, + "intent_template_id": 274 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 95, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Telll me the grand total of invoice {{id}}.", + "instantiation_dict": { + "id": "000000002" + }, + "intent": "Telll me the grand total of invoice 000000002.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "39.64" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$39.64" + }, + "intent_template_id": 274 + }, + { + "sites": [ + "shopping" + ], + "task_id": 96, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Tell me the status of my latest order and when will it arrive", + "instantiation_dict": {}, + "intent": "Tell me the status of my latest order and when will it arrive", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "The last order was canceled. It will never arrive." + ] + }, + "reference_url": "", + "program_html": [], + "reference_answer_raw_annotation": "The last order was canceled. It will never arrive.", + "string_note": "" + }, + "intent_template_id": 193 + }, + { + "sites": [ + "map", + "wikipedia" + ], + "task_id": 97, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Tell me the distance to drive from Carnegie Mellon University to the top computer science school in massachusetts", + "instantiation_dict": {}, + "intent": "Tell me the distance to drive from Carnegie Mellon University to the top computer science school in massachusetts", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "914km" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "914 km" + }, + "intent_template_id": 120 + }, + { + "sites": [ + "map" + ], + "task_id": 98, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Where is the nearest {{places}} to {{start}}, and what is the walking distance to it?", + "instantiation_dict": { + "places": "tea cafe", + "start": "University of Pittsburgh" + }, + "intent": "Where is the nearest tea cafe to University of Pittsburgh, and what is the walking distance to it?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Fuku Tea", + "3716", + "Forbes Avenue", + "Central Oakland", + "Pittsburgh", + "653m" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Fuku Tea, 3716, Forbes Avenue, Oakland, Central Oakland, Pittsburgh, Allegheny County, Pennsylvania, 15213, United States\n653m" + }, + "intent_template_id": 66 + }, + { + "sites": [ + "map" + ], + "task_id": 99, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Where is the nearest {{places}} to {{start}}, and what is the walking distance to it?", + "instantiation_dict": { + "places": "Five Guys", + "start": "5700 Penn Ave" + }, + "intent": "Where is the nearest Five Guys to 5700 Penn Ave, and what is the walking distance to it?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Five Guys", + "117", + "South Bouquet Street", + "North Oakland", + "Pittsburgh", + "4.0km" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Five Guys, 117, South Bouquet Street, Oakland, North Oakland, Pittsburgh, Allegheny County, Pennsylvania, 15213, United States\n4.0km" + }, + "intent_template_id": 66 + }, + { + "sites": [ + "map" + ], + "task_id": 100, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Where is the nearest {{places}} to {{start}}, and what is the walking distance to it?", + "instantiation_dict": { + "places": "Starbucks", + "start": "Carnegie Mellon" + }, + "intent": "Where is the nearest Starbucks to Carnegie Mellon, and what is the walking distance to it?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Starbucks", + "417", + "South Craig Street", + "Bellefield", + "Pittsburgh", + "557m" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Starbucks, 417, South Craig Street, Bellefield, Pittsburgh, Allegheny County, Pennsylvania, 15213, United States\n557m" + }, + "intent_template_id": 66 + }, + { + "sites": [ + "map" + ], + "task_id": 101, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Where is the nearest {{places}} to {{start}}, and what is the walking distance to it?", + "instantiation_dict": { + "places": "In-N-Out", + "start": "Upitts" + }, + "intent": "Where is the nearest In-N-Out to Upitts, and what is the walking distance to it?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no In-N-Out near University of Pittsburgh", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 66 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 102, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Display the list of issues in the {{repo}} repository that have labels related to {{label}}", + "instantiation_dict": { + "label": "help needed", + "repo": "a11yproject/a11yproject.com" + }, + "intent": "Display the list of issues in the a11yproject/a11yproject.com repository that have labels related to help needed", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/byteblaze/a11y-syntax-highlighting/-/issues/?label_name%5B%5D=help%20wanted", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 349 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 103, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Display the list of issues in the {{repo}} repository that have labels related to {{label}}", + "instantiation_dict": { + "label": "questions", + "repo": "kkroening/ffmpeg-python" + }, + "intent": "Display the list of issues in the kkroening/ffmpeg-python repository that have labels related to questions", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/kkroening/ffmpeg-python/-/issues/?label_name%5B%5D=question", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 349 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 104, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Display the list of issues in the {{repo}} repository that have labels related to {{label}}", + "instantiation_dict": { + "label": "flaky-test", + "repo": "keycloak/keycloak" + }, + "intent": "Display the list of issues in the keycloak/keycloak repository that have labels related to flaky-test", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/keycloak/keycloak/-/issues/?label_name%5B%5D=flaky-test", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 349 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 105, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Display the list of issues in the {{repo}} repository that have labels related to {{label}}", + "instantiation_dict": { + "label": "OpenAPI Generator CLI", + "repo": "OpenAPITools/openapi-generator" + }, + "intent": "Display the list of issues in the OpenAPITools/openapi-generator repository that have labels related to OpenAPI Generator CLI", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/OpenAPITools/openapi-generator/-/issues/?label_name%5B%5D=OpenAPI%20Generator%20CLI", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 349 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 106, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Display the list of issues in the {{repo}} repository that have labels related to {{label}}", + "instantiation_dict": { + "label": "BUG", + "repo": "umano/AndroidSlidingUpPanel" + }, + "intent": "Display the list of issues in the umano/AndroidSlidingUpPanel repository that have labels related to BUG", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/umano/AndroidSlidingUpPanel/-/issues/?label_name%5B%5D=BUG", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 349 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 107, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Presents the monthly count of successful orders {{period}} in MM:COUNT format", + "instantiation_dict": { + "period": "from May to December 2022" + }, + "intent": "Presents the monthly count of successful orders from May to December 2022 in MM:COUNT format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "May: 8 orders", + "June: 13 orders", + "July: 9 orders", + "August: 8 orders", + "Sepetember: 10 orders", + "October: 4 orders", + "November: 5 orders", + "December: 10 orders" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "May: 8 orders June: 13 orders July: 9 orders August: 8 orders Sepetember: 10 orders Octorbor: 4 orders November: 5 orders December: 10 orders " + }, + "intent_template_id": 270 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 108, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Presents the monthly count of successful orders {{period}} in MM:COUNT format", + "instantiation_dict": { + "period": "01/2023-05/2023" + }, + "intent": "Presents the monthly count of successful orders 01/2023-05/2023 in MM:COUNT format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "January: 12 orders", + "Feburary: 7 orders", + "March: 5 orders", + "April: 9 orders", + "May: 5 orders" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "January: 12 orders Febulary: 7 orders March: 5 orders Apirl: 9 orders May: 5 orders" + }, + "intent_template_id": 270 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 109, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Presents the monthly count of successful orders {{period}} in MM:COUNT format", + "instantiation_dict": { + "period": "from Jan to December 2022" + }, + "intent": "Presents the monthly count of successful orders from Jan to December 2022 in MM:COUNT format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "January: 11 orders", + "Feburary: 16 orders", + "March: 14 orders", + "April: 7 orders", + "May: 8 orders", + "June: 13 orders", + "July: 9 orders", + "August: 8 orders", + "Sepetember: 10 orders", + "Octorbor: 4 orders", + "November: 5 orders", + "December: 10 orders" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "January: 11 orders Feburary: 16 orders March: 14 orders April: 7 orders May: 8 orders June: 13 orders July: 9 orders August: 8 orders Sepetember: 10 orders Octorbor: 4 orders November: 5 orders December: 10 orders " + }, + "intent_template_id": 270 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 110, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Presents the monthly count of successful orders {{period}} in MM:COUNT format", + "instantiation_dict": { + "period": "from Jan to Nov 2022" + }, + "intent": "Presents the monthly count of successful orders from Jan to Nov 2022 in MM:COUNT format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "January: 11 orders", + "Feburary: 16 orders", + "March: 14 orders", + "April: 7 orders", + "May: 8 orders", + "June: 13 orders", + "July: 9 orders", + "August: 8 orders", + "Sepetember: 10 orders", + "Octorbor: 4 orders", + "November: 5 orders" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "January: 11 orders Feburary: 16 orders March: 14 orders April: 7 orders May: 8 orders June: 13 orders July: 9 orders August: 8 orders Sepetember: 10 orders Octorbor: 4 orders November: 5 orders " + }, + "intent_template_id": 270 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 111, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Presents the monthly count of successful orders {{period}} in MM:COUNT format", + "instantiation_dict": { + "period": "from Feb to Nov 2022" + }, + "intent": "Presents the monthly count of successful orders from Feb to Nov 2022 in MM:COUNT format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Feburary: 16 orders", + "March: 14 orders", + "April: 7 orders", + "May: 8 orders", + "June: 13 orders", + "July: 9 orders", + "August: 8 orders", + "Sepetember: 10 orders", + "Octorbor: 4 orders", + "November: 5 orders" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Feburary: 16 orders March: 14 orders April: 7 orders May: 8 orders June: 13 orders July: 9 orders August: 8 orders Sepetember: 10 orders Octorbor: 4 orders November: 5 orders " + }, + "intent_template_id": 270 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 112, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Show me the customers who have expressed dissatisfaction with {{product}}?", + "instantiation_dict": { + "product": "Circe fleece" + }, + "intent": "Show me the customers who have expressed dissatisfaction with Circe fleece?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Hannah Lim" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Hannah Lim" + }, + "intent_template_id": 245 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 113, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Show me the customers who have expressed dissatisfaction with {{product}}?", + "instantiation_dict": { + "product": "Olivia zip jacket" + }, + "intent": "Show me the customers who have expressed dissatisfaction with Olivia zip jacket?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Emma Lopez", + "Seam Miller" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Emma Lopez, Seam Miller" + }, + "intent_template_id": 245 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 114, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Show me the customers who have expressed dissatisfaction with {{product}}?", + "instantiation_dict": { + "product": "Antonia racer tank" + }, + "intent": "Show me the customers who have expressed dissatisfaction with Antonia racer tank?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Shaunte", + "Merrie" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Shaunte, Merrie" + }, + "intent_template_id": 245 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 115, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Show me the name of the customers who have expressed dissatisfaction with {{product}}", + "instantiation_dict": { + "product": "Chloe tank" + }, + "intent": "Show me the name of the customers who have expressed dissatisfaction with Chloe tank", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no negative review for Chloe tank", + "reference_answer_raw_annotation": "" + }, + "intent_template_id": 245 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 116, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Show me the name of the customers who have expressed dissatisfaction with {{product}}?", + "instantiation_dict": { + "product": "tanks products" + }, + "intent": "Show me the name of the customers who have expressed dissatisfaction with tanks products?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Alexander", + "Carma", + "Dominic", + "Merrie", + "Monroe", + "Scotty", + "Shaunte", + "Teofila", + "Valorie" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Alexander, Carma, Dominic, Merrie, Monroe, Scotty, Shaunte, Teofila, Valorie" + }, + "intent_template_id": 245 + }, + { + "sites": [ + "shopping" + ], + "task_id": 117, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What is the date when I made my first purchase on this site?", + "instantiation_dict": {}, + "intent": "What is the date when I made my first purchase on this site?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "3/2/22" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3/2/22" + }, + "intent_template_id": 161 + }, + { + "sites": [ + "shopping" + ], + "task_id": 118, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I have jaw bruxism problem, show me something that could alleviate the problem.", + "instantiation_dict": {}, + "intent": "I have jaw bruxism problem, show me something that could alleviate the problem.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "", + "required_contents": { + "must_include": [ + "jaw bruxism", + "mouth guard" + ] + } + } + ] + }, + "intent_template_id": 151 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 119, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Tell me the reasons why customers like {{product}}", + "instantiation_dict": { + "product": "Antonia Racer Tank" + }, + "intent": "Tell me the reasons why customers like Antonia Racer Tank", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Its color and style is good" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Its color and style is good" + }, + "intent_template_id": 250 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 120, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Tell me the reasons why customers like {{product}}", + "instantiation_dict": { + "product": "Ana Running Short" + }, + "intent": "Tell me the reasons why customers like Ana Running Short", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "It is comfortable" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "It is comfortable" + }, + "intent_template_id": 250 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 121, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Tell me the reasons why customers like {{product}}", + "instantiation_dict": { + "product": "Circe hooded fleece" + }, + "intent": "Tell me the reasons why customers like Circe hooded fleece", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Warm and comfortable. True to size." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Warm and comfortable. True to size." + }, + "intent_template_id": 250 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 122, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Tell me the reasons why customers like {{product}}", + "instantiation_dict": { + "product": "Olivia zip jacket" + }, + "intent": "Tell me the reasons why customers like Olivia zip jacket", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Lightweight, comfortable and stylish. Good design and details." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lightweight, comfortable, and stylish. Good design and details." + }, + "intent_template_id": 250 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 123, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Tell me the reasons why customers like {{product}}", + "instantiation_dict": { + "product": "Circe's products" + }, + "intent": "Tell me the reasons why customers like Circe's products", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Warm and comfortable. True to size." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Warm and comfortable. True to size." + }, + "intent_template_id": 250 + }, + { + "sites": [ + "shopping" + ], + "task_id": 124, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What is the price range of {{product}} in the One Stop Market?", + "instantiation_dict": { + "product": "wireless earphone" + }, + "intent": "What is the price range of wireless earphone in the One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0.14", + "745.00" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$0.14 - $745.00" + }, + "intent_template_id": 159 + }, + { + "sites": [ + "shopping" + ], + "task_id": 125, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What is the price range of {{product}} in the One Stop Market?", + "instantiation_dict": { + "product": "teeth grinding mouth guard" + }, + "intent": "What is the price range of teeth grinding mouth guard in the One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1.46", + "85" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$1.46 - $85" + }, + "intent_template_id": 159 + }, + { + "sites": [ + "shopping" + ], + "task_id": 126, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What is the price range of {{product}} in the One Stop Market?", + "instantiation_dict": { + "product": "Canon photo printer" + }, + "intent": "What is the price range of Canon photo printer in the One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2.56", + "649.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$2.56 - $649.99" + }, + "intent_template_id": 159 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 127, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What brands appear most frequently among the top search terms?", + "instantiation_dict": {}, + "intent": "What brands appear most frequently among the top search terms?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Hollister", + "Joust", + "Antonia" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Hollister, Joust, Antonia" + }, + "intent_template_id": 1001 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 128, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What's the total number of items sold in the most recent {{k}} orders?", + "instantiation_dict": { + "k": "2" + }, + "intent": "What's the total number of items sold in the most recent 2 orders?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "9" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "9" + }, + "intent_template_id": 1002 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 129, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What's the total number of items sold in the most recent {{k}} orders?", + "instantiation_dict": { + "k": "4" + }, + "intent": "What's the total number of items sold in the most recent 4 orders?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "16" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "16" + }, + "intent_template_id": 1002 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 130, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What's the total number of items sold in the most recent {{k}} orders?", + "instantiation_dict": { + "k": "5" + }, + "intent": "What's the total number of items sold in the most recent 5 orders?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "18" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "18" + }, + "intent_template_id": 1002 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 131, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What's the total number of items sold in the most recent {{k}} orders?", + "instantiation_dict": { + "k": "7" + }, + "intent": "What's the total number of items sold in the most recent 7 orders?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "25" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "25" + }, + "intent_template_id": 1002 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 132, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "How many commits did {{user}} make to {{repo}} on {{date}}?", + "instantiation_dict": { + "user": "kilian", + "repo": "a11yproject", + "date": "3/5/2023" + }, + "intent": "How many commits did kilian make to a11yproject on 3/5/2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1" + }, + "intent_template_id": 322 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 133, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "How many commits did {{user}} make to {{repo}} on {{date}}?", + "instantiation_dict": { + "user": "Eric", + "repo": "a11yproject", + "date": "3/2" + }, + "intent": "How many commits did Eric make to a11yproject on 3/2?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "2" + }, + "intent_template_id": 322 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 134, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "How many commits did {{user}} make to {{repo}} on {{date}}?", + "instantiation_dict": { + "user": "kilian", + "repo": "a11yproject", + "date": "3/1/2023" + }, + "intent": "How many commits did kilian make to a11yproject on 3/1/2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 322 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 135, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "How many commits did {{user}} make to {{repo}} on {{date}}?", + "instantiation_dict": { + "user": "Eric and Kilian", + "repo": "a11yproject", + "date": "1/3/2023" + }, + "intent": "How many commits did Eric and Kilian make to a11yproject on 1/3/2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1" + }, + "intent_template_id": 322 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 136, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "How many commits did {{user}} make to {{repo}} on {{date}}?", + "instantiation_dict": { + "user": "Steven Woodson", + "repo": "a11y-webring.club", + "date": "2/6/2023" + }, + "intent": "How many commits did Steven Woodson make to a11y-webring.club on 2/6/2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "5" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "5" + }, + "intent_template_id": 322 + }, + { + "sites": [ + "map" + ], + "task_id": 137, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the estimated driving time between {{city1}} and {{city2}}?", + "instantiation_dict": { + "city1": "the city where the Liberty Bell is located", + "city2": "the home city of Pirates" + }, + "intent": "What is the estimated driving time between the city where the Liberty Bell is located and the home city of Pirates?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "5h 47min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "5h 47min" + }, + "intent_template_id": 51 + }, + { + "sites": [ + "map" + ], + "task_id": 138, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the estimated driving time between {{city1}} and {{city2}}?", + "instantiation_dict": { + "city1": "the big apple", + "city2": "the city with the most authentic Philly cheesesteaks" + }, + "intent": "What is the estimated driving time between the big apple and the city with the most authentic Philly cheesesteaks?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "1h 58min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1h 58min" + }, + "intent_template_id": 51 + }, + { + "sites": [ + "map" + ], + "task_id": 139, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the estimated driving time between {{city1}} and {{city2}}?", + "instantiation_dict": { + "city1": "the hometown of Joe Biden", + "city2": "Bridgeport" + }, + "intent": "What is the estimated driving time between the hometown of Joe Biden and Bridgeport?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "3h 20min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3h 20min" + }, + "intent_template_id": 51 + }, + { + "sites": [ + "map" + ], + "task_id": 140, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the estimated driving time between {{city1}} and {{city2}}?", + "instantiation_dict": { + "city1": "the city of Niagara Falls", + "city2": "the city of Yale University" + }, + "intent": "What is the estimated driving time between the city of Niagara Falls and the city of Yale University?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "8h 33min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "8h 33min" + }, + "intent_template_id": 51 + }, + { + "sites": [ + "shopping" + ], + "task_id": 141, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "How much I spent on {{category}} shopping during {{time}}", + "instantiation_dict": { + "category": "food-related", + "time": "March 2023" + }, + "intent": "How much I spent on food-related shopping during March 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "47.41" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$47.41" + }, + "intent_template_id": 162 + }, + { + "sites": [ + "shopping" + ], + "task_id": 142, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "How much I spent on {{category}} shopping during {{time}}", + "instantiation_dict": { + "category": "hair care and hair style", + "time": "Jan 2023" + }, + "intent": "How much I spent on hair care and hair style shopping during Jan 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "95.23" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$95.23" + }, + "intent_template_id": 162 + }, + { + "sites": [ + "shopping" + ], + "task_id": 143, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "How much I spent on {{category}} shopping during {{time}}", + "instantiation_dict": { + "category": "home decoration", + "time": "1/29/2023" + }, + "intent": "How much I spent on home decoration shopping during 1/29/2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "265.69" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$265.69" + }, + "intent_template_id": 162 + }, + { + "sites": [ + "shopping" + ], + "task_id": 144, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "How much I spent on {{category}} shopping during {{time}}", + "instantiation_dict": { + "category": "food", + "time": "from mid Jan to the end Jan 2023" + }, + "intent": "How much I spent on food shopping during from mid Jan to the end Jan 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 162 + }, + { + "sites": [ + "shopping" + ], + "task_id": 145, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "How much I spent on {{category}} shopping during {{time}}", + "instantiation_dict": { + "category": "cooking and food", + "time": "March 2022" + }, + "intent": "How much I spent on cooking and food shopping during March 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "52.35" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$52.35" + }, + "intent_template_id": 162 + }, + { + "sites": [ + "shopping" + ], + "task_id": 146, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What is the {{option}} configuration of the {{product}} I bought {{time}}", + "instantiation_dict": { + "option": "size", + "product": "picture frame", + "time": "Sep 2022" + }, + "intent": "What is the size configuration of the picture frame I bought Sep 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "16x24" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "16x24" + }, + "intent_template_id": 155 + }, + { + "sites": [ + "shopping" + ], + "task_id": 147, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What is the {{option}} configuration of the {{product}} I bought {{time}}", + "instantiation_dict": { + "option": "size", + "product": "picture frame", + "time": "2022" + }, + "intent": "What is the size configuration of the picture frame I bought 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "16x24" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "16x24" + }, + "intent_template_id": 155 + }, + { + "sites": [ + "shopping" + ], + "task_id": 148, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What is the {{option}} configuration of the {{product}} I bought {{time}}", + "instantiation_dict": { + "option": "color", + "product": "picture frame", + "time": "Sep 2022" + }, + "intent": "What is the color configuration of the picture frame I bought Sep 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Mist" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Mist" + }, + "intent_template_id": 155 + }, + { + "sites": [ + "shopping" + ], + "task_id": 149, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What is the {{option}} configuration of the {{product}} I bought {{time}}", + "instantiation_dict": { + "option": "color", + "product": "artifical plants", + "time": "Feb 2023" + }, + "intent": "What is the color configuration of the artifical plants I bought Feb 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Green-vines" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Green-vines" + }, + "intent_template_id": 155 + }, + { + "sites": [ + "shopping" + ], + "task_id": 150, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What is the {{option}} configuration of the {{product}} I bought {{time}}", + "instantiation_dict": { + "option": "price", + "product": "fake tree", + "time": "Jan 2023" + }, + "intent": "What is the price configuration of the fake tree I bought Jan 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "260.69" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "260.69" + }, + "intent_template_id": 155 + }, + { + "sites": [ + "map" + ], + "task_id": 151, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the minimum travel time by car from {{location1}} to {{location2}}?", + "instantiation_dict": { + "location1": "CMU", + "location2": "University of Pittsburgh" + }, + "intent": "What is the minimum travel time by car from CMU to University of Pittsburgh?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "4min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "4min" + }, + "intent_template_id": 36 + }, + { + "sites": [ + "map" + ], + "task_id": 152, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the minimum travel time by car from {{location1}} to {{location2}}?", + "instantiation_dict": { + "location1": "Schenley park", + "location2": "Upitt" + }, + "intent": "What is the minimum travel time by car from Schenley park to Upitt?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "4min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "4min" + }, + "intent_template_id": 36 + }, + { + "sites": [ + "map" + ], + "task_id": 153, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the minimum travel time by car from {{location1}} to {{location2}}?", + "instantiation_dict": { + "location1": "REI", + "location2": "CMU" + }, + "intent": "What is the minimum travel time by car from REI to CMU?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "7min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "7min" + }, + "intent_template_id": 36 + }, + { + "sites": [ + "map" + ], + "task_id": 154, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the minimum travel time by car from {{location1}} to {{location2}}?", + "instantiation_dict": { + "location1": "CMU gates building", + "location2": "Schenley park" + }, + "intent": "What is the minimum travel time by car from CMU gates building to Schenley park?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "4min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "4min" + }, + "intent_template_id": 36 + }, + { + "sites": [ + "map" + ], + "task_id": 155, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the minimum travel time by car from {{location1}} to {{location2}}?", + "instantiation_dict": { + "location1": "Animal Rescue League of Pittsburgh", + "location2": "Schenley park" + }, + "intent": "What is the minimum travel time by car from Animal Rescue League of Pittsburgh to Schenley park?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "9min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "9min" + }, + "intent_template_id": 36 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 156, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Checkout merge requests assigned to me", + "instantiation_dict": {}, + "intent": "Checkout merge requests assigned to me", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/dashboard/merge_requests?assignee_username=byteblaze", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 290 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 157, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Show all customers", + "instantiation_dict": {}, + "intent": "Show all customers", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/customer/index/", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 255 + }, + { + "sites": [ + "shopping" + ], + "task_id": 158, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all {{num}} cards", + "instantiation_dict": { + "num": 11 + }, + "intent": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all 11 cards", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/heiying-game-card-case-for-nintendo-switch-switch-oled-game-card-or-micro-sd-memory-cards-portable-switch-game-memory-card-storage-with-24-game-card-slots-and-24-micro-sd-card-slots-black.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 171 + }, + { + "sites": [ + "shopping" + ], + "task_id": 159, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all {{num}} cards", + "instantiation_dict": { + "num": 31 + }, + "intent": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all 31 cards", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/game-card-holder-storage-case-for-nintendo-switch-games-or-ps-vita-game-case-or-sd-memory-cards-black.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 171 + }, + { + "sites": [ + "shopping" + ], + "task_id": 160, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all {{num}} cards", + "instantiation_dict": { + "num": 6 + }, + "intent": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all 6 cards", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/heiying-game-card-case-for-nintendo-switch-switch-oled-game-card-or-micro-sd-memory-cards-portable-switch-game-memory-card-storage-with-24-game-card-slots-and-24-micro-sd-card-slots-black.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 171 + }, + { + "sites": [ + "shopping" + ], + "task_id": 161, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all {{num}} cards", + "instantiation_dict": { + "num": 23 + }, + "intent": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all 23 cards", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/heiying-game-card-case-for-nintendo-switch-switch-oled-game-card-or-micro-sd-memory-cards-portable-switch-game-memory-card-storage-with-24-game-card-slots-and-24-micro-sd-card-slots-black.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 171 + }, + { + "sites": [ + "shopping" + ], + "task_id": 162, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all {{num}} cards", + "instantiation_dict": { + "num": 40 + }, + "intent": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all 40 cards", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/game-card-holder-storage-case-for-nintendo-switch-games-or-ps-vita-game-case-or-sd-memory-cards-black.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 171 + }, + { + "sites": [ + "shopping" + ], + "task_id": 163, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/ostent-16gb-memory-card-stick-storage-for-sony-ps-vita-psv1000-2000-pch-z081-z161-z321-z641.html", + "geolocation": null, + "intent_template": "What are the main criticisms of this product? Please extract the relevant sentences.", + "instantiation_dict": {}, + "intent": "What are the main criticisms of this product? Please extract the relevant sentences.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "I ordered the 16gb but I only got 14 gigs even though I formatted the card", + "The memory card is kind of slow on games and downloads", + "No original packaging It's used and the previous owners data has not been erased", + "The product is a legit sony hardware that have been owned by someone else before", + "The media could not be loaded", + "I could not format the card so I wasn\u2019t able to use it for my VITA" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "I ordered the 16gb but I only got 14 gigs even though I formatted the card. The memory card is kind of slow on games and downloads. No original packaging It's used and the previous owners data has not been erased. The product is a legit sony hardware that have been owned by someone else before The media could not be loaded. I could not format the card so I wasn\u2019t able to use it for my VITA" + }, + "intent_template_id": 136 + }, + { + "sites": [ + "shopping" + ], + "task_id": 164, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/mineralogie-all-natural-lip-gloss-ruby-rose.html", + "geolocation": null, + "intent_template": "What are the main criticisms of this product? Please extract the relevant sentences.", + "instantiation_dict": {}, + "intent": "What are the main criticisms of this product? Please extract the relevant sentences.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Dry", + "Uneven color" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "DryUneven color" + }, + "intent_template_id": 136 + }, + { + "sites": [ + "shopping" + ], + "task_id": 165, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/sandgrens-swedish-handmade-wooden-clog-sandal-copenhagen.html", + "geolocation": null, + "intent_template": "What are the main criticisms of this product? Please extract the relevant sentences.", + "instantiation_dict": {}, + "intent": "What are the main criticisms of this product? Please extract the relevant sentences.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "The 39 was too small. I am afraid the 40 will be too big", + "I was very sad when the shoe rubbed up against my baby toe", + "I had to return them because I knew in time it would tear up my feet", + "The problem is that the strap is made of some really stiff leather and is painful to my heel", + "The front is also uncomfortably tight", + "The Dansko's were similar (not as bad) and loosened up over time" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "The 39 was too small. I am afraid the 40 will be too big. I was very sad when the shoe rubbed up against my baby toe. I had to return them because I knew in time it would tear up my feet. The problem is that the strap is made of some really stiff leather and is painful to my heel. The front is also uncomfortably tight. The Dansko's were similar (not as bad) and loosened up over time." + }, + "intent_template_id": 136 + }, + { + "sites": [ + "shopping" + ], + "task_id": 166, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/sensodyne-repair-protect-whitening-toothpaste-with-fluoride-3-4-oz-pack-of-3.html", + "geolocation": null, + "intent_template": "What are the main criticisms of this product? Please extract the relevant sentences.", + "instantiation_dict": {}, + "intent": "What are the main criticisms of this product? Please extract the relevant sentences.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "there is no existing criticism", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 136 + }, + { + "sites": [ + "shopping" + ], + "task_id": 167, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/photosmart-plus-b209-clr-inkjetfb-p-s-c-usb-wrls-1.html", + "geolocation": null, + "intent_template": "What are the main criticisms of this product? Please extract the relevant sentences.", + "instantiation_dict": {}, + "intent": "What are the main criticisms of this product? Please extract the relevant sentences.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "The wireless connection works on a whim (about 40% of the time I've owned it)", + "It seems to constantly run out of ink", + "Cartridge prices are less than some printers I've had", + "This printer seems to have more reasons NOT to work (none that are findable or correctable) Ex: error boxes saying that it's out of paper when it automatically switches to photo printing for some reason", + "Scanner is as slow as my first scanner I ever owned in the mid-90's", + "For the $176 I paid, there isn't even a fax component on it. I guess the \"PLUS\" part of it's name is in reference to the migraines it causes when you can't figure out the new reason why it's not working for the 10th time in the past 2 months." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "The wireless connection works on a whim (about 40% of the time I've owned it). It seems to constantly run out of ink. Cartridge prices are less than some printers I've had, but now I understand why. This printer seems to have more reasons NOT to work (none that are findable or correctable) Ex: error boxes saying that it's out of paper when it automatically switches to photo printing for some reason. Scanner is as slow as my first scanner I ever owned in the mid-90's. For the $176 I paid, there isn't even a fax component on it. I guess the \"PLUS\" part of it's name is in reference to the migraines it causes when you can't figure out the new reason why it's not working for the 10th time in the past 2 months." + }, + "intent_template_id": 136 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 168, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Tell me the full names of the repositories where I made contributions and they got {{description}} stars?", + "instantiation_dict": { + "description": "more than 100" + }, + "intent": "Tell me the full names of the repositories where I made contributions and they got more than 100 stars?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "No repo found", + "reference_answer_raw_annotation": "No repo found" + }, + "intent_template_id": 289 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 169, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Tell me the full names of the repositories where I made contributions and they got {{description}} stars?", + "instantiation_dict": { + "description": "the most" + }, + "intent": "Tell me the full names of the repositories where I made contributions and they got the most stars?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "a11yproject.com", + "design" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "a11yproject.com, Primer/design" + }, + "intent_template_id": 289 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 170, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Tell me the full names of the repositories where I made contributions and they got {{description}} stars?", + "instantiation_dict": { + "description": "the least" + }, + "intent": "Tell me the full names of the repositories where I made contributions and they got the least stars?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "cloud-to-butt", + "dotfiles", + "timeit", + "solarized-prism-theme", + "gimmiethat.space", + "remove-board-movement-events-from-the-github-issue-timeline" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "cloud-to-butt, dotfiles, timeit, solarized-prism-theme, gimmiethat.space, remove-board-movement-events-from-the-github-issue-timeline" + }, + "intent_template_id": 289 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 171, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Tell me the full names of the repositories where I made contributions and they got {{description}} stars?", + "instantiation_dict": { + "description": "less than 5" + }, + "intent": "Tell me the full names of the repositories where I made contributions and they got less than 5 stars?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "a11y-syntax-highlighting", + "a11y-webring.club", + "accessible-html-content-patterns", + "ericwbailey.website", + "cloud-to-butt", + "dotfiles", + "timeit", + "solarized-prism-theme", + "gimmiethat.space", + "remove-board-movement-events-from-the-github-issue-timeline" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "a11y-syntax-highlighting, a11y-webring.club, accessible-html-content-patterns, ericwbailey.website, cloud-to-butt, dotfiles, timeit, solarized-prism-theme, gimmiethat.space, remove-board-movement-events-from-the-github-issue-timeline" + }, + "intent_template_id": 289 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 172, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Tell me the full names of the repositories where I made contributions and they got {{description}} stars?", + "instantiation_dict": { + "description": "no" + }, + "intent": "Tell me the full names of the repositories where I made contributions and they got no stars?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "cloud-to-butt", + "dotfiles", + "timeit", + "solarized-prism-theme", + "gimmiethat.space", + "remove-board-movement-events-from-the-github-issue-timeline" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "cloud-to-butt, dotfiles, timeit, solarized-prism-theme, gimmiethat.space, remove-board-movement-events-from-the-github-issue-timeline" + }, + "intent_template_id": 289 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 173, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Open my latest updated issue that has keyword \"{{keyword}}\" in its title to check if it is closed", + "instantiation_dict": { + "keyword": "better" + }, + "intent": "Open my latest updated issue that has keyword \"better\" in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "fuzzy_match": ["No, it is open"] + }, + "reference_url": "http://localhost:28084/byteblaze/empathy-prompts/-/issues/8", + "program_html": [], + "reference_answer_raw_annotation": "Not closed", + "string_note": "", + "url_note": "GOLD in PRED" + }, + "intent_template_id": 310 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 174, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Open my latest updated issue that has keyword \"{{keyword}}\" in its title to check if it is closed", + "instantiation_dict": { + "keyword": "feature" + }, + "intent": "Open my latest updated issue that has keyword \"feature\" in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "fuzzy_match": ["No, it is open"] + }, + "reference_url": "http://localhost:28084/byteblaze/a11y-webring.club/-/issues/71", + "program_html": [], + "reference_answer_raw_annotation": "Not closed", + "string_note": "" + }, + "intent_template_id": 310 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 175, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Open my latest updated issue that has keyword \"{{keyword}}\" in its title to check if it is closed", + "instantiation_dict": { + "keyword": "dependency" + }, + "intent": "Open my latest updated issue that has keyword \"dependency\" in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "fuzzy_match": ["No, it is open"] + }, + "reference_url": "http://localhost:28084/byteblaze/empathy-prompts/-/issues/18", + "program_html": [], + "reference_answer_raw_annotation": "Not closed", + "string_note": "" + }, + "intent_template_id": 310 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 176, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Open my latest updated issue that has keyword \"{{keyword}}\" in its title to check if it is closed", + "instantiation_dict": { + "keyword": "theme editor" + }, + "intent": "Open my latest updated issue that has keyword \"theme editor\" in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "fuzzy_match": ["No, it is open"] + }, + "reference_url": "http://localhost:28084/byteblaze/a11y-syntax-highlighting/-/issues/1", + "program_html": [], + "reference_answer_raw_annotation": "Not closed", + "string_note": "" + }, + "intent_template_id": 310 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 177, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Open my latest updated issue that has keyword \"{{keyword}}\" in its title to check if it is closed", + "instantiation_dict": { + "keyword": "homepage content" + }, + "intent": "Open my latest updated issue that has keyword \"homepage content\" in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "fuzzy_match": ["Yes, it is closed"] + }, + "reference_url": "http://localhost:28084/a11yproject/a11yproject.com/-/issues/719", + "program_html": [], + "reference_answer_raw_annotation": "closed", + "string_note": "" + }, + "intent_template_id": 310 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 178, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Open my latest created issue that has {{keyword}} in its title to check if it is closed", + "instantiation_dict": { + "keyword": "better" + }, + "intent": "Open my latest created issue that has better in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "Yes" + }, + "reference_url": "http://localhost:28084/a11yproject/a11yproject.com/-/issues/566", + "program_html": [], + "reference_answer_raw_annotation": "Closed", + "string_note": "" + }, + "intent_template_id": 500 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 179, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Open my latest created issue that has {{keyword}} in its title to check if it is closed", + "instantiation_dict": { + "keyword": "feature" + }, + "intent": "Open my latest created issue that has feature in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "Yes" + }, + "reference_url": "http://localhost:28084/a11yproject/a11yproject.com/-/issues/1517", + "program_html": [], + "reference_answer_raw_annotation": "Closed", + "string_note": "" + }, + "intent_template_id": 500 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 180, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Open my latest created issue that has {{keyword}} in its title to check if it is closed", + "instantiation_dict": { + "keyword": "dependency" + }, + "intent": "Open my latest created issue that has dependency in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "No" + }, + "reference_url": "http://localhost:28084/byteblaze/empathy-prompts/-/issues/18", + "program_html": [], + "reference_answer_raw_annotation": "Not closed", + "string_note": "" + }, + "intent_template_id": 500 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 181, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Open my latest created issue that has {{keyword}} in its title to check if it is closed", + "instantiation_dict": { + "keyword": "theme editor" + }, + "intent": "Open my latest created issue that has theme editor in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "No" + }, + "reference_url": "http://localhost:28084/byteblaze/a11y-syntax-highlighting/-/issues/1", + "program_html": [], + "reference_answer_raw_annotation": "Not closed", + "string_note": "" + }, + "intent_template_id": 500 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 182, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Open my latest created issue that has {{keyword}} in its title to check if it is closed", + "instantiation_dict": { + "keyword": "homepage content" + }, + "intent": "Open my latest created issue that has homepage content in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "Yes" + }, + "reference_url": "http://localhost:28084/a11yproject/a11yproject.com/-/issues/719", + "program_html": [], + "reference_answer_raw_annotation": "closed", + "string_note": "" + }, + "intent_template_id": 500 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 183, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Give me the {{Attribute}} of the products that have {{N}} units left", + "instantiation_dict": { + "Attribute": "SKU", + "N": "10" + }, + "intent": "Give me the SKU of the products that have 10 units left", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no product that has 10 quantities left.", + "reference_answer_raw_annotation": "There is no product that has 10 quantities left." + }, + "intent_template_id": 368 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 184, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Give me the {{Attribute}} of the products that have {{N}} units left", + "instantiation_dict": { + "Attribute": "name", + "N": "0" + }, + "intent": "Give me the name of the products that have 0 units left", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Sinbad Fitness Tank" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Sinbad Fitness Tank" + }, + "intent_template_id": 368 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 185, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Give me the {{Attribute}} of the products that have {{N}} units left", + "instantiation_dict": { + "Attribute": "brand", + "N": "3" + }, + "intent": "Give me the brand of the products that have 3 units left", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Eos", + "Minerva" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Eos, Minerva" + }, + "intent_template_id": 368 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 186, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Give me the {{Attribute}} of the products that have {{N}} units left", + "instantiation_dict": { + "Attribute": "product names and the sizes", + "N": "2-3" + }, + "intent": "Give me the product names and the sizes of the products that have 2-3 units left", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Eos V-Neck Hoodie: S", + "Minera Luma Tech V-Tee: XS" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Eos V-Neck Hoodie: S Minera Luma Tech V-Tee: XS" + }, + "intent_template_id": 368 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 187, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Give me the {{Attribute}} of the products that have {{N}} units left", + "instantiation_dict": { + "Attribute": "SKU", + "N": "1-3" + }, + "intent": "Give me the SKU of the products that have 1-3 units left", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "WH11-S-Blue", + "WS08-XS-Blue" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "WH11-S-Blue, WS08-XS-Blue" + }, + "intent_template_id": 368 + }, + { + "sites": [ + "shopping" + ], + "task_id": 188, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Tell me the total cost of my latest {{status}} order?", + "instantiation_dict": { + "status": "cancelled" + }, + "intent": "Tell me the total cost of my latest cancelled order?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "365.42" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "365.42" + }, + "intent_template_id": 214 + }, + { + "sites": [ + "shopping" + ], + "task_id": 189, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Tell me the total cost of my latest {{status}} order?", + "instantiation_dict": { + "status": "pending" + }, + "intent": "Tell me the total cost of my latest pending order?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "754.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "754.99" + }, + "intent_template_id": 214 + }, + { + "sites": [ + "shopping" + ], + "task_id": 190, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Tell me the total cost of my latest {{status}} order?", + "instantiation_dict": { + "status": "complete" + }, + "intent": "Tell me the total cost of my latest complete order?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "65.32" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "65.32" + }, + "intent_template_id": 214 + }, + { + "sites": [ + "shopping" + ], + "task_id": 191, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Tell me the total cost of my latest {{status}} order?", + "instantiation_dict": { + "status": "processing" + }, + "intent": "Tell me the total cost of my latest processing order?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no order of \"processing\" status", + "reference_answer_raw_annotation": "There is no order of \"processing\" status" + }, + "intent_template_id": 214 + }, + { + "sites": [ + "shopping" + ], + "task_id": 192, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Tell me the total cost of my latest {{status}} order?", + "instantiation_dict": { + "status": "non-cancelled" + }, + "intent": "Tell me the total cost of my latest non-cancelled order?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "754.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "754.99" + }, + "intent_template_id": 214 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 193, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Get the total payment amount of the last {{N}} {{status}} orders", + "instantiation_dict": { + "status": "completed", + "N": "2" + }, + "intent": "Get the total payment amount of the last 2 completed orders", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "182.4" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "182.4" + }, + "intent_template_id": 367 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 194, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Get the total payment amount of the last {{N}} {{status}} orders", + "instantiation_dict": { + "status": "completed", + "N": "5" + }, + "intent": "Get the total payment amount of the last 5 completed orders", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "555.2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "555.2" + }, + "intent_template_id": 367 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 195, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Get the total payment amount of the last {{N}} {{status}} orders", + "instantiation_dict": { + "status": "pending", + "N": "5" + }, + "intent": "Get the total payment amount of the last 5 pending orders", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "885.4" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "885.4" + }, + "intent_template_id": 367 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 196, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Compare the payment difference of the last {{N}} {{status_1}} orders and {{status_2}} orders", + "instantiation_dict": { + "status_1": "cancelled", + "status_2": "completed", + "N": "4" + }, + "intent": "Compare the payment difference of the last 4 cancelled orders and completed orders", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "194.25" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "194.25" + }, + "intent_template_id": 367 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 197, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Get the total payment amount of the last {{N}} {{status}} orders", + "instantiation_dict": { + "status": "non-cancelled", + "N": "5" + }, + "intent": "Get the total payment amount of the last 5 non-cancelled orders", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "778.2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "annotation_note": "219.4+210+166.4+93.4+89", + "reference_answer_raw_annotation": "778.2" + }, + "intent_template_id": 367 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 198, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "customer name", + "status": "most recent cancelled" + }, + "intent": "Get the customer name of the most recent cancelled order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Lily Potter" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lily Potter" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 199, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "order ID", + "status": "newest pending" + }, + "intent": "Get the order ID of the newest pending order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "299" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "299" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 200, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "billing name", + "status": "oldest complete" + }, + "intent": "Get the billing name of the oldest complete order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "John Lee" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "John Lee" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 201, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "customer name", + "status": "earliest fraud suspect" + }, + "intent": "Get the customer name of the earliest fraud suspect order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no order of \"fraud suspect\" status", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 202, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "date", + "status": "most recent canlled" + }, + "intent": "Get the date of the most recent canlled order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "May 23 2023" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "May 23, 2023" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 203, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "purchase date and order id", + "status": "most recent pending" + }, + "intent": "Get the purchase date and order id of the most recent pending order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "order id: 000000299", + "purchase date: May 31, 2023" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "000000299, May 31, 2023, 2:55:09 AM" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 204, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "product name and discounted price (low to high)", + "status": "most recent completed" + }, + "intent": "Get the product name and discounted price (low to high) of the most recent completed order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Rapha Sports Short: $35", + "Thorpe Track Pant: $54.4", + "Mach Street Sweatshirt: $62" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Rapha Sports Short: $35 Thorpe Track Pant: $54.4 Mach Street Sweatshirt: $62" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 205, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make on {{date}}?", + "instantiation_dict": { + "user": "kilian", + "date": "3/5/2023" + }, + "intent": "How many commits did kilian make on 3/5/2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1" + }, + "intent_template_id": 320 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 206, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make on {{date}}?", + "instantiation_dict": { + "user": "Eric", + "date": "3/2" + }, + "intent": "How many commits did Eric make on 3/2?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "2" + }, + "intent_template_id": 320 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 207, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make on {{date}} in total?", + "instantiation_dict": { + "user": "Eric and Kilian", + "date": "1/3/2023" + }, + "intent": "How many commits did Eric and Kilian make on 1/3/2023 in total?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1" + }, + "intent_template_id": 320 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 208, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Find the customer name and email with phone number {{PhoneNum}}", + "instantiation_dict": { + "PhoneNum": "+1 2058812302" + }, + "intent": "Find the customer name and email with phone number +1 2058812302", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "John Smith", + "john.smith.xyz@gmail.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "John Smith, john.smith.xyz@gmail.com" + }, + "intent_template_id": 364 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 209, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Find the customer name and email with phone number {{PhoneNum}}", + "instantiation_dict": { + "PhoneNum": "2137418080" + }, + "intent": "Find the customer name and email with phone number 2137418080", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Jennifer White", + "jennifer.white@yahoo.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Jennifer White, jennifer.white@yahoo.com" + }, + "intent_template_id": 364 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 210, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Find the customer name and email with phone number {{PhoneNum}}", + "instantiation_dict": { + "PhoneNum": "2065555555" + }, + "intent": "Find the customer name and email with phone number 2065555555", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Adam Garcia", + "gamingpro456@gmail.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Adam Garcia, gamingpro456@gmail.com" + }, + "intent_template_id": 364 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 211, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Find the customer name and email with phone number {{PhoneNum}}", + "instantiation_dict": { + "PhoneNum": "8015551212" + }, + "intent": "Find the customer name and email with phone number 8015551212", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Sean Miller", + "sean.miller@gmail.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Sean Miller, sean.miller@gmail.com" + }, + "intent_template_id": 364 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 212, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Find the customer name and email with phone number {{PhoneNum}}", + "instantiation_dict": { + "PhoneNum": "555-229-3326" + }, + "intent": "Find the customer name and email with phone number 555-229-3326", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Veronica Costello", + "roni_cost@example.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Veronica Costello, roni_cost@example.com" + }, + "intent_template_id": 364 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 213, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What are the key aspects that the customers don't like about {{product}}", + "instantiation_dict": { + "product": "Antonia Racer Tank" + }, + "intent": "What are the key aspects that the customers don't like about Antonia Racer Tank", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Not suitable for high-impact workouts" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Not suitable for high-impact workouts" + }, + "intent_template_id": 249 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 214, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What are the key aspects that the customers don't like about {{product}}", + "instantiation_dict": { + "product": "Zing Jump Rope" + }, + "intent": "What are the key aspects that the customers don't like about Zing Jump Rope", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "It is hard to find the right size. Won't last long" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "It is hard to find the right size. Won't last long" + }, + "intent_template_id": 249 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 215, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What are the key aspects that the customers don't like about {{product}}", + "instantiation_dict": { + "product": "Circe ice fleece" + }, + "intent": "What are the key aspects that the customers don't like about Circe ice fleece", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Material quality, fit, insufficient warmth, color" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Material quality, fit, insufficient warmth, color" + }, + "intent_template_id": 249 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 216, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What are the key aspects that the customers don't like about {{product}}", + "instantiation_dict": { + "product": "Electra Bra Top" + }, + "intent": "What are the key aspects that the customers don't like about Electra Bra Top", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Not true to size" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Not true to size" + }, + "intent_template_id": 249 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 217, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What are the key aspects that the customers don't like about {{product}}", + "instantiation_dict": { + "product": "Pursuit Tone Band" + }, + "intent": "What are the key aspects that the customers don't like about Pursuit Tone Band", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Insufficient resistance for their workouts." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Insufficient resistance for their workouts." + }, + "intent_template_id": 249 + }, + { + "sites": [ + "map" + ], + "task_id": 218, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Show me the walking distance from nearby hotels to {{location}} that take at most {{n}} minutes?", + "instantiation_dict": { + "location": "CMU, Pittsburgh", + "n": "5" + }, + "intent": "Show me the walking distance from nearby hotels to CMU, Pittsburgh that take at most 5 minutes?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no hotel near CMU that is within 5 minutes walking distance", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 41 + }, + { + "sites": [ + "map" + ], + "task_id": 219, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Show me the walking distance from nearby hotels to {{location}} that take at most {{n}} minutes?", + "instantiation_dict": { + "location": "Pittsburgh airport", + "n": "3" + }, + "intent": "Show me the walking distance from nearby hotels to Pittsburgh airport that take at most 3 minutes?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no hotel near CMU that is within 5 minutes walking distance", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 41 + }, + { + "sites": [ + "map" + ], + "task_id": 220, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Show me the walking distance from nearby hotels to {{location}} that take at most {{n}} minutes?", + "instantiation_dict": { + "location": "Gardner Steel Conference Center,", + "n": 5 + }, + "intent": "Show me the walking distance from nearby hotels to Gardner Steel Conference Center, that take at most 5 minutes?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Wyndham Pittsburgh University Cente: 375m", + "The Oaklander Hotel: 338m" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Wyndham Pittsburgh University Cente: 375 m\nThe Oaklander Hotel: 338 m" + }, + "intent_template_id": 41 + }, + { + "sites": [ + "map" + ], + "task_id": 221, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "I am at CMU Pittsburgh, how long it takes to the nearest {{location}} with different transportation methods?", + "instantiation_dict": { + "location": "USPS postal office" + }, + "intent": "I am at CMU Pittsburgh, how long it takes to the nearest USPS postal office with different transportation methods?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Walk: 1 minute", + "Drive: less than 1 minute", + "Bike: less than 1 minute" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Walk: 1 minute to walk and\nDrive: less than 1 minute\nBike: less than 1 minute" + }, + "intent_template_id": 35 + }, + { + "sites": [ + "map" + ], + "task_id": 222, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "I am at CMU Pittsburgh, how long it takes to drive to the nearest {{location}}", + "instantiation_dict": { + "location": "cold stone ice cream" + }, + "intent": "I am at CMU Pittsburgh, how long it takes to drive to the nearest cold stone ice cream", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "3min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3min" + }, + "intent_template_id": 35 + }, + { + "sites": [ + "map" + ], + "task_id": 223, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "I am at CMU Pittsburgh, how long it takes to drive to the nearest {{location}}", + "instantiation_dict": { + "location": "Mcdonald's" + }, + "intent": "I am at CMU Pittsburgh, how long it takes to drive to the nearest Mcdonald's", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "4min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "4min" + }, + "intent_template_id": 35 + }, + { + "sites": [ + "map" + ], + "task_id": 224, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "I am at CMU Pittsburgh, how long it takes to drive to the nearest {{location}}", + "instantiation_dict": { + "location": "wendys" + }, + "intent": "I am at CMU Pittsburgh, how long it takes to drive to the nearest wendys", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "3min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3min" + }, + "intent_template_id": 35 + }, + { + "sites": [ + "shopping" + ], + "task_id": 225, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What do customers say about {{product_type}} from {{manufature}}", + "instantiation_dict": { + "product_type": "brush", + "manufature": "sephora" + }, + "intent": "What do customers say about brush from sephora", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The sephora brushes don't have reviews", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 135 + }, + { + "sites": [ + "shopping" + ], + "task_id": 226, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What is the price range for products from {{brand}}?", + "instantiation_dict": { + "brand": "Amazon basic" + }, + "intent": "What is the price range for products from Amazon basic?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "5.49", + "375.19" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$5.49 - $375.19" + }, + "intent_template_id": 370 + }, + { + "sites": [ + "shopping" + ], + "task_id": 227, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What is the price range for products from {{brand}}?", + "instantiation_dict": { + "brand": "EYZUTAK" + }, + "intent": "What is the price range for products from EYZUTAK?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "9.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$9.99" + }, + "intent_template_id": 370 + }, + { + "sites": [ + "shopping" + ], + "task_id": 228, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What is the price range for products from {{brand}}?", + "instantiation_dict": { + "brand": "sephora" + }, + "intent": "What is the price range for products from sephora?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "18.18", + "94.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$18.18 - $94.99" + }, + "intent_template_id": 370 + }, + { + "sites": [ + "shopping" + ], + "task_id": 229, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What is the price range for products from {{brand}}?", + "instantiation_dict": { + "brand": "ugreen" + }, + "intent": "What is the price range for products from ugreen?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "6.99", + "38.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$6.99 - $38.99" + }, + "intent_template_id": 370 + }, + { + "sites": [ + "shopping" + ], + "task_id": 230, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What is the price range for products from {{brand}}?", + "instantiation_dict": { + "brand": "Perricone MD" + }, + "intent": "What is the price range for products from Perricone MD?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "35", + "149" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$35 - $149" + }, + "intent_template_id": 370 + }, + { + "sites": [ + "shopping" + ], + "task_id": 231, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Get the order number of my most recent {{status}} order ", + "instantiation_dict": { + "status": "cancelled" + }, + "intent": "Get the order number of my most recent cancelled order ", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "170" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "000000170" + }, + "intent_template_id": 213 + }, + { + "sites": [ + "shopping" + ], + "task_id": 232, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Get the order number of my most recent {{status}} order ", + "instantiation_dict": { + "status": "pending" + }, + "intent": "Get the order number of my most recent pending order ", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "189" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "000000189" + }, + "intent_template_id": 213 + }, + { + "sites": [ + "shopping" + ], + "task_id": 233, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Get the order number of my most recent {{status}} order ", + "instantiation_dict": { + "status": "complete" + }, + "intent": "Get the order number of my most recent complete order ", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "180" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "000000180" + }, + "intent_template_id": 213 + }, + { + "sites": [ + "shopping" + ], + "task_id": 234, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Get the order number of my most recent {{status}} order ", + "instantiation_dict": { + "status": "on hold" + }, + "intent": "Get the order number of my most recent on hold order ", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "there is no on hold order", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 213 + }, + { + "sites": [ + "shopping" + ], + "task_id": 235, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Get the order number of my most recent {{status}} order ", + "instantiation_dict": { + "status": "under delivery" + }, + "intent": "Get the order number of my most recent under delivery order ", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no under delivery order", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 213 + }, + { + "sites": [ + "map" + ], + "task_id": 236, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Where is the nearest {{location}} from {{location2}} {{condition}}", + "instantiation_dict": { + "location": "pharmacy", + "location2": "Carnegie Mellon", + "condition": "I can walk within 20mins" + }, + "intent": "Where is the nearest pharmacy from Carnegie Mellon I can walk within 20mins", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Schiller's Pharmacy", + "811", + "South Aiken Avenue", + "Shadyside", + "Pittsburgh" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Schiller's Pharmacy, 811, South Aiken Avenue, Shadyside, Pittsburgh, Allegheny County, 15232, United States" + }, + "intent_template_id": 39 + }, + { + "sites": [ + "map" + ], + "task_id": 237, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Where is the nearest {{location}} from {{location2}} {{condition}}", + "instantiation_dict": { + "location": "gas station", + "location2": "CMU", + "condition": "" + }, + "intent": "Where is the nearest gas station from CMU ", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Sunoco", + "North Craig Street", + "North Oakland", + "Pittsburgh" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Sunoco, North Craig Street, North Oakland, Pittsburgh, Allegheny County, 15213, United States" + }, + "intent_template_id": 39 + }, + { + "sites": [ + "shopping" + ], + "task_id": 238, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I am doing a market survey for one stop market, show me the most expensive product from {{product_category}} category", + "instantiation_dict": { + "product_category": "PS4 accessories" + }, + "intent": "I am doing a market survey for one stop market, show me the most expensive product from PS4 accessories category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/astro-gaming-a50-wireless-headset-base-station-gen-4-compatible-with-ps5-ps4-pc-mac-black-silver.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 138 + }, + { + "sites": [ + "shopping" + ], + "task_id": 239, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I am doing a market survey for one stop market, show me the most expensive product from {{product_category}} category", + "instantiation_dict": { + "product_category": "nutrition bars and drinks" + }, + "intent": "I am doing a market survey for one stop market, show me the most expensive product from nutrition bars and drinks category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/kellogg-s-special-k-protein-meal-bars-chocolate-caramel-12-7oz-6-count.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 138 + }, + { + "sites": [ + "shopping" + ], + "task_id": 240, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I am doing a market survey for one stop market, show me the most expensive product from {{product_category}} category", + "instantiation_dict": { + "product_category": "competative swimwear" + }, + "intent": "I am doing a market survey for one stop market, show me the most expensive product from competative swimwear category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/women-cross-flower-beachwear-tankini-bandeau-bandage-bikini-set-push-up-swimwear-bathing-suit-two-pieces-swimsuits.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 138 + }, + { + "sites": [ + "shopping" + ], + "task_id": 241, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I am doing a market survey for one stop market, show me the most expensive product from {{product_category}} category", + "instantiation_dict": { + "product_category": "skin care tool" + }, + "intent": "I am doing a market survey for one stop market, show me the most expensive product from skin care tool category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/professional-medi-spa-scar-stretch-mark-reduction-system.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 138 + }, + { + "sites": [ + "shopping" + ], + "task_id": 242, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I am doing a market survey for one stop market, show me the most expensive product from {{product_category}} category", + "instantiation_dict": { + "product_category": "Household Supplies" + }, + "intent": "I am doing a market survey for one stop market, show me the most expensive product from Household Supplies category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/lynx-battery-12v-200ah-lithium-iron-phosphate-lifepo4-prismatic-deep-cell-battery-set-of-4-3-2v-cells-with-3-bus-bars-and-8-lug-nuts-for-rv-solar-marine-off-grid-applications.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 138 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 243, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Show me the {{information}} of the customer who is the most unhappy with {{product}}", + "instantiation_dict": { + "information": "email address", + "product": "Circe fleece" + }, + "intent": "Show me the email address of the customer who is the most unhappy with Circe fleece", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "hannah.lim@gmail.com" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "hannah.lim@gmail.com" + }, + "intent_template_id": 244 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 244, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Show me the {{information}} of the customer who is the most unhappy with {{product}}", + "instantiation_dict": { + "information": "email address", + "product": "Olivia zip jacket" + }, + "intent": "Show me the email address of the customer who is the most unhappy with Olivia zip jacket", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "emma.lopez@gmail.com" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "emma.lopez@gmail.com" + }, + "intent_template_id": 244 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 245, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Show me the {{information}} of the customer who is the most unhappy with {{product}}", + "instantiation_dict": { + "information": "name", + "product": "Antonia racer tank" + }, + "intent": "Show me the name of the customer who is the most unhappy with Antonia racer tank", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Shaunte" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Shaunte" + }, + "intent_template_id": 244 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 246, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Show me the {{information}} of the customer who is the most unhappy with {{product}}", + "instantiation_dict": { + "information": "name", + "product": "Chloe tank" + }, + "intent": "Show me the name of the customer who is the most unhappy with Chloe tank", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Teofila" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Teofila" + }, + "intent_template_id": 244 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 247, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Show me the {{information}} of the customer who is the most unhappy with {{product}}", + "instantiation_dict": { + "information": "email address", + "product": "the style of Zoe products" + }, + "intent": "Show me the email address of the customer who is the most unhappy with the style of Zoe products", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "Valorie doesn't have a email in the system", + "program_html": [], + "string_note": "There is no negative review for Zoe products", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 244 + }, + { + "sites": [ + "map" + ], + "task_id": 248, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Tell me the coordinates of {{location}} in DD format", + "instantiation_dict": { + "location": "Carnegie Mellon Caf\u00e9" + }, + "intent": "Tell me the coordinates of Carnegie Mellon Caf\u00e9 in DD format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "40.442", + "-79.939" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "40.4424191, -79.9397388" + }, + "intent_template_id": 46 + }, + { + "sites": [ + "map" + ], + "task_id": 249, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Tell me the coordinates of {{location}} in DD format", + "instantiation_dict": { + "location": "Western Pennsylvania Hospital Heliport" + }, + "intent": "Tell me the coordinates of Western Pennsylvania Hospital Heliport in DD format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "40.460", + "-79.946" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "40.46076, -79.94666" + }, + "intent_template_id": 46 + }, + { + "sites": [ + "map" + ], + "task_id": 250, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Tell me the coordinates of {{location}} in DD format", + "instantiation_dict": { + "location": "Apple Store near Pitt" + }, + "intent": "Tell me the coordinates of Apple Store near Pitt in DD format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "40.451", + "-79.933" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "40.4511693, -79.9334241" + }, + "intent_template_id": 46 + }, + { + "sites": [ + "map" + ], + "task_id": 251, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Tell me the coordinates of {{location}} in DD format", + "instantiation_dict": { + "location": "bus stop on the Carnegie art museum side of the street near CMU" + }, + "intent": "Tell me the coordinates of bus stop on the Carnegie art museum side of the street near CMU in DD format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "40.444", + "-79.948" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "40.4443, -79.94889" + }, + "intent_template_id": 46 + }, + { + "sites": [ + "map" + ], + "task_id": 252, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Tell me the coordinates of {{location}} in DD format", + "instantiation_dict": { + "location": "Tokyo Japanese Food Store in Pittsburgh" + }, + "intent": "Tell me the coordinates of Tokyo Japanese Food Store in Pittsburgh in DD format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "40.457", + "-79.929" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "40.45761, -79.92934" + }, + "intent_template_id": 46 + }, + { + "sites": [ + "map" + ], + "task_id": 253, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the {{information}} of {{location}}", + "instantiation_dict": { + "location": "Carnegie Mellon Caf\u00e9", + "information": "phone number" + }, + "intent": "What is the phone number of Carnegie Mellon Caf\u00e9", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no such information in the map", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 501 + }, + { + "sites": [ + "map" + ], + "task_id": 254, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the {{information}} of {{location}}", + "instantiation_dict": { + "location": "Western Pennsylvania Hospital", + "information": "phone number" + }, + "intent": "What is the phone number of Western Pennsylvania Hospital", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "4125785000" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "4125785000" + }, + "intent_template_id": 501 + }, + { + "sites": [ + "map" + ], + "task_id": 255, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Who is the {{information}} of {{location}}", + "instantiation_dict": { + "location": "PIT airport", + "information": "operator" + }, + "intent": "Who is the operator of PIT airport", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Allegheny County Airport Authority" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Allegheny County Airport Authority" + }, + "intent_template_id": 501 + }, + { + "sites": [ + "map" + ], + "task_id": 256, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the {{information}} of {{location}}", + "instantiation_dict": { + "location": "Carnegie art museum in pittsburgh", + "information": "website" + }, + "intent": "What is the website of Carnegie art museum in pittsburgh", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "http://web.cmoa.org/" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "http://web.cmoa.org/" + }, + "intent_template_id": 501 + }, + { + "sites": [ + "map" + ], + "task_id": 257, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the {{information}} of {{location}}", + "instantiation_dict": { + "location": "Tokyo Japanese Food Store in Pittsburgh", + "information": "hours of operation" + }, + "intent": "What is the hours of operation of Tokyo Japanese Food Store in Pittsburgh", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "We-Su 10:00-17:00" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "We-Su 10:00-17:00" + }, + "intent_template_id": 501 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 258, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "See all public projects", + "instantiation_dict": {}, + "intent": "See all public projects", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/explore", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 325 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 259, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Get me my RSS feed token", + "instantiation_dict": {}, + "intent": "Get me my RSS feed token", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "TMN_bBn9Z48qVbUFZV45" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "TMN_bBn9Z48qVbUFZV45" + }, + "intent_template_id": 312 + }, + { + "sites": [ + "shopping" + ], + "task_id": 260, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I want to browse the products in the {{category}} category", + "instantiation_dict": { + "category": "Video Game" + }, + "intent": "I want to browse the products in the Video Game category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/video-games.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 211 + }, + { + "sites": [ + "shopping" + ], + "task_id": 261, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I want to browse the products in the {{category}} category", + "instantiation_dict": { + "category": "Headphones" + }, + "intent": "I want to browse the products in the Headphones category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/electronics/headphones.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 211 + }, + { + "sites": [ + "shopping" + ], + "task_id": 262, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I want to browse the products in the {{category}} category", + "instantiation_dict": { + "category": "Men shoes" + }, + "intent": "I want to browse the products in the Men shoes category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/clothing-shoes-jewelry/men/shoes.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 211 + }, + { + "sites": [ + "shopping" + ], + "task_id": 263, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I want to browse the products in the {{category}} category", + "instantiation_dict": { + "category": "Woman clothing" + }, + "intent": "I want to browse the products in the Woman clothing category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/clothing-shoes-jewelry/women/clothing.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 211 + }, + { + "sites": [ + "shopping" + ], + "task_id": 264, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I want to browse the products in the {{category}} category", + "instantiation_dict": { + "category": "Cabinets, Racks & Shelves" + }, + "intent": "I want to browse the products in the Cabinets, Racks & Shelves category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/office-products/office-furniture-lighting/cabinets-racks-shelves.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 211 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 265, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What's the closest national park to {{city}}? How far is it to drive there?", + "instantiation_dict": { + "city": "Boston" + }, + "intent": "What's the closest national park to Boston? How far is it to drive there?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Acadia National Park", + "457km" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Acadia National Park\n457km" + }, + "intent_template_id": 85 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 266, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What's the closest national park to {{city}}?", + "instantiation_dict": { + "city": "the largest city in Maine" + }, + "intent": "What's the closest national park to the largest city in Maine?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Acadia National Park" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Acadia National Park" + }, + "intent_template_id": 85 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 267, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What's the closest national park to {{city}}? How long it takes to drive there?", + "instantiation_dict": { + "city": "the hometown of Stephen King" + }, + "intent": "What's the closest national park to the hometown of Stephen King? How long it takes to drive there?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Acadia National Park" + ], + "fuzzy_match": [ + "1h 23min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Acadia National Park\n1h 23min" + }, + "intent_template_id": 85 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 268, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What's the closest national park to {{city}}? How long does it take to bike there?", + "instantiation_dict": { + "city": "Vinalhaven, ME" + }, + "intent": "What's the closest national park to Vinalhaven, ME? How long does it take to bike there?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Acadia National Park" + ], + "fuzzy_match": [ + "10h 33min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Acadia National Park\n10h 33min" + }, + "intent_template_id": 85 + }, + { + "sites": [ + "shopping" + ], + "task_id": 269, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show me products under ${{price}} in \"{{product_category}}\" category", + "instantiation_dict": { + "price": "25", + "product_category": "women shoes" + }, + "intent": "Show me products under $25 in \"women shoes\" category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/clothing-shoes-jewelry/women/shoes.html?price=0-25", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 139 + }, + { + "sites": [ + "shopping" + ], + "task_id": 270, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show me products under ${{price}} in \"{{product_category}}\" category", + "instantiation_dict": { + "price": "30", + "product_category": "men shoes" + }, + "intent": "Show me products under $30 in \"men shoes\" category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/clothing-shoes-jewelry/men/shoes.html?price=0-30", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 139 + }, + { + "sites": [ + "shopping" + ], + "task_id": 271, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show me products under ${{price}} in \"{{product_category}}\" category", + "instantiation_dict": { + "price": "46.99", + "product_category": "makeup remover" + }, + "intent": "Show me products under $46.99 in \"makeup remover\" category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/beauty-personal-care/makeup/makeup-remover.html?price=0-46.99", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 139 + }, + { + "sites": [ + "shopping" + ], + "task_id": 272, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show me products under ${{price}} in \"{{product_category}}\" category", + "instantiation_dict": { + "price": "78", + "product_category": "children dental care" + }, + "intent": "Show me products under $78 in \"children dental care\" category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/beauty-personal-care/oral-care/children-s-dental-care.html?price=0-78", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 139 + }, + { + "sites": [ + "shopping" + ], + "task_id": 273, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show me products under ${{price}} in \"{{product_category}}\" category", + "instantiation_dict": { + "price": "199", + "product_category": "furtiture with accent" + }, + "intent": "Show me products under $199 in \"furtiture with accent\" category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/home-kitchen/furniture/accent-furniture.html?price=0-199", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 139 + }, + { + "sites": [ + "shopping" + ], + "task_id": 274, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Search for \"{{keyword}}\"", + "instantiation_dict": { + "keyword": "usb wifi" + }, + "intent": "Search for \"usb wifi\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/catalogsearch/result/?q=usb+wifi", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 212 + }, + { + "sites": [ + "shopping" + ], + "task_id": 275, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Search for \"{{keyword}}\"", + "instantiation_dict": { + "keyword": "xbox" + }, + "intent": "Search for \"xbox\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/catalogsearch/result/?q=xbox", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 212 + }, + { + "sites": [ + "shopping" + ], + "task_id": 276, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Search for \"{{keyword}}\"", + "instantiation_dict": { + "keyword": "switch accessories" + }, + "intent": "Search for \"switch accessories\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/catalogsearch/result/?q=switch+accessories", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 212 + }, + { + "sites": [ + "shopping" + ], + "task_id": 277, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Search for \"{{keyword}}\"", + "instantiation_dict": { + "keyword": "batteries for iphone 13" + }, + "intent": "Search for \"batteries for iphone 13\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/catalogsearch/result/?q=iphone+13", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 212 + }, + { + "sites": [ + "shopping" + ], + "task_id": 278, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Search for \"{{keyword}}\"", + "instantiation_dict": { + "keyword": "green tea bag for weight loss" + }, + "intent": "Search for \"green tea bag for weight loss\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/catalogsearch/result/?q=green+tea+bag+for+weight+loss", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 212 + }, + { + "sites": [ + "shopping" + ], + "task_id": 279, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Provide me with the complete names of Bluetooth headphones from Sony, and also share the price range for the available models", + "instantiation_dict": {}, + "intent": "Provide me with the complete names of Bluetooth headphones from Sony, and also share the price range for the available models", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "SONY WH1000XM3 Bluetooth Wireless Noise Canceling Headphones Silver WH-1000XM3/S (Renewed)", + "Sony WH-CH710N/H Wireless Bluetooth Noise Cancelling Headphones", + "Sony WH-1000XM3B Wireless Bluetooth Noise-Canceling Over-Ear Headphones (Black) Basic Headphone Bundle Kit with Stylus", + "Sony Wireless Headphones WH-CH510: Wireless Bluetooth On-Ear Headset with Mic for Phone-Call, Black", + "Sony WHCH710N Wireless Bluetooth Noise Canceling Over-The-Ear Headphones (Black) with Kratos 18W PD Two-Port Power Adapter and Kratos 6-Feet Nylon Braided USB-C Cable Bundle (3 Items)", + "Sony WI-SP500 Wireless in-Ear Sports Headphones, White (WISP500/W)", + "Sony WI-SP510 Extra BASS Wireless in-Ear Headset/Headphones with mic for Phone Call Sports IPX5 Bluetooth, Black (WISP510/B)", + "Sony MDRAS600BT Active Sports Bluetooth Headset (Black)", + "Sony WH-1000XM4 Wireless Noise Canceling Over-Ear Headphones (Black) with Sony WLA-NS7 Wireless TV Adapter Bundle (2 Items)", + "Sony WI-C300 Wireless In-Ear Headphones, Red (WIC300/R)", + "Sony XB950N1 Extra Bass Wireless Noise Canceling Headphones, Black", + "SONY - H900N Hi-Res Noise Cancelling Wireless Headphone Grayish Black Renewed", + "18.99", + "406" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "These models are avaiable: SONY WH1000XM3 Bluetooth Wireless Noise Canceling Headphones Silver WH-1000XM3/S (Renewed) Sony WH-CH710N/H Wireless Bluetooth Noise Cancelling Headphones Sony WH-1000XM3B Wireless Bluetooth Noise-Canceling Over-Ear Headphones (Black) Basic Headphone Bundle Kit with Stylus Sony Wireless Headphones WH-CH510: Wireless Bluetooth On-Ear Headset with Mic for Phone-Call, Black Sony WHCH710N Wireless Bluetooth Noise Canceling Over-The-Ear Headphones (Black) with Kratos 18W PD Two-Port Power Adapter and Kratos 6-Feet Nylon Braided USB-C Cable Bundle (3 Items) Sony WI-SP500 Wireless in-Ear Sports Headphones, White (WISP500/W) Sony WI-SP510 Extra BASS Wireless in-Ear Headset/Headphones with mic for Phone Call Sports IPX5 Bluetooth, Black (WISP510/B) Sony MDRAS600BT Active Sports Bluetooth Headset (Black) Sony WH-1000XM4 Wireless Noise Canceling Over-Ear Headphones (Black) with Sony WLA-NS7 Wireless TV Adapter Bundle (2 Items) Sony WI-C300 Wireless In-Ear Headphones, Red (WIC300/R) Sony XB950N1 Extra Bass Wireless Noise Canceling Headphones, Black SONY - H900N Hi-Res Noise Cancelling Wireless Headphone Grayish Black Renewed The price ranges from $18.99 to $406 " + }, + "intent_template_id": 204 + }, + { + "sites": [ + "shopping" + ], + "task_id": 280, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Provide me with the full names of chargers from Anker, and also share the price range for the available models", + "instantiation_dict": {}, + "intent": "Provide me with the full names of chargers from Anker, and also share the price range for the available models", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Anker USB C Charger 30W, 711 Charger, Compact Fast Charger (Not Foldable) for MacBook Air/iPhone 13/13 Mini/13 Pro/13 Pro Max/12, Galaxy S21, Note 20, iPad Pro, Pixel, and More", + "Anker USB C Charger 40W, 521 Charger (Nano Pro), PIQ 3.0 Durable Compact Fast Charger (Not Foldable) for iPhone 13/13 Mini/13 Pro/13 Pro Max/12, Galaxy, Pixel 4/3, iPad/iPad Mini (Cable Not Included)", + "Anker PowerCore Speed 20000, 20000mAh Qualcomm Quick Charge 3.0 & PowerIQ Portable Charger, with Quick Charge Recharging, Power Bank for Samsung, iPhone, iPad and More, Black (A1278)", + "5Ft Micro-USB Charger Cord Cable Fit for Anker-PowerCore 5000 10000 20100 13000 26800 Mini 3350 Fusion II 15000 Redux 20000 Slim 10000 Astro E1 AC Replacement Power Adapter Supply", + "Anker 10W Max Wireless Charger, 313 Wireless Charger (Pad), Qi-Certified Wireless Charging 7.5W for iPhone 12/12 Pro/12 mini/12 Pro Max, 10W for Galaxy S10 S9 S8, S9 Plus, Note 9 (No AC Adapter)", + "Anker Wireless Charger, 313 Wireless Charger (Stand), Qi-Certified for iPhone 12, 12 Pro Max, SE, 11, 11 Pro, 11 Pro Max, XR, XS Max, 10W Fast-Charging Galaxy S20, S10 (No AC Adapter)", + "USB Charger, Anker Elite Dual Port 24W Wall Charger, PowerPort 2 with PowerIQ and Foldable Plug, for iPhone 11/Xs/XS Max/XR/X/8/7/6/Plus, iPad Pro/Air 2/Mini 3/Mini 4, Samsung S4/S5, and More", + "iPhone 12 Charger [GaN Tech], Anker 30W Compact USB-C Wall Charger with Power Delivery, PowerPort Atom for iPhone 12 / Mini/Pro/Pro Max / 11 / X/XS/XR, iPad Pro, MacBook 12'', Pixel, Galaxy", + "USB C Charger, Anker 30W 2 Port Fast Charger with 18W USB C Power Adapter, Foldable PowerPort PD 2 Charger for iPad Pro, iPhone 11/11 Pro / 11 Pro Max/XS/Max/XR/X, Pixel, Galaxy, and More", + "Anker 40W 5-Port USB Wall Charger, PowerPort 5 for iPhone XS / XS Max / XR / X / 8 / 7 / 6 / Plus, iPad Pro / Air 2 / mini, Galaxy S9 / S8 / Edge / Plus, Note 8 / 7, LG, Nexus, HTC and More, Black (AK-A2124111)", + "Anker Quick Charge 3.0 39W Dual USB Wall Charger, PowerPort Speed 2 for Galaxy S10/S9/S8/Edge/Plus, Note 8/7 and PowerIQ for iPhone Xs/XS Max/XR/X/8/Plus, iPad Pro/Air 2/Mini, LG, Nexus, HTC and More", + "USB C Charger, Anker 20W PIQ 3.0 Fast Charger with Foldable Plug, PowerPort III Charger for iPhone 13/13 Mini/13 Pro/13 Pro Max/12/11, iPad/iPad Mini, MagSafe, and More (Cable Not Included)", + "8.99", + "59.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "These models are availiable: Anker USB C Charger 30W, 711 Charger, Compact Fast Charger (Not Foldable) for MacBook Air/iPhone 13/13 Mini/13 Pro/13 Pro Max/12, Galaxy S21, Note 20, iPad Pro, Pixel, and More Anker USB C Charger 40W, 521 Charger (Nano Pro), PIQ 3.0 Durable Compact Fast Charger (Not Foldable) for iPhone 13/13 Mini/13 Pro/13 Pro Max/12, Galaxy, Pixel 4/3, iPad/iPad Mini (Cable Not Included) Anker PowerCore Speed 20000, 20000mAh Qualcomm Quick Charge 3.0 & PowerIQ Portable Charger, with Quick Charge Recharging, Power Bank for Samsung, iPhone, iPad and More, Black (A1278) 5Ft Micro-USB Charger Cord Cable Fit for Anker-PowerCore 5000 10000 20100 13000 26800 Mini 3350 Fusion II 15000 Redux 20000 Slim 10000 Astro E1 AC Replacement Power Adapter Supply Anker 10W Max Wireless Charger, 313 Wireless Charger (Pad), Qi-Certified Wireless Charging 7.5W for iPhone 12/12 Pro/12 mini/12 Pro Max, 10W for Galaxy S10 S9 S8, S9 Plus, Note 9 (No AC Adapter) Anker Wireless Charger, 313 Wireless Charger (Stand), Qi-Certified for iPhone 12, 12 Pro Max, SE, 11, 11 Pro, 11 Pro Max, XR, XS Max, 10W Fast-Charging Galaxy S20, S10 (No AC Adapter) USB Charger, Anker Elite Dual Port 24W Wall Charger, PowerPort 2 with PowerIQ and Foldable Plug, for iPhone 11/Xs/XS Max/XR/X/8/7/6/Plus, iPad Pro/Air 2/Mini 3/Mini 4, Samsung S4/S5, and More iPhone 12 Charger [GaN Tech], Anker 30W Compact USB-C Wall Charger with Power Delivery, PowerPort Atom for iPhone 12 / Mini/Pro/Pro Max / 11 / X/XS/XR, iPad Pro, MacBook 12'', Pixel, Galaxy USB C Charger, Anker 30W 2 Port Fast Charger with 18W USB C Power Adapter, Foldable PowerPort PD 2 Charger for iPad Pro, iPhone 11/11 Pro / 11 Pro Max/XS/Max/XR/X, Pixel, Galaxy, and More Anker 40W 5-Port USB Wall Charger, PowerPort 5 for iPhone XS / XS Max / XR / X / 8 / 7 / 6 / Plus, iPad Pro / Air 2 / mini, Galaxy S9 / S8 / Edge / Plus, Note 8 / 7, LG, Nexus, HTC and More, Black (AK-A2124111) Anker Quick Charge 3.0 39W Dual USB Wall Charger, PowerPort Speed 2 for Galaxy S10/S9/S8/Edge/Plus, Note 8/7 and PowerIQ for iPhone Xs/XS Max/XR/X/8/Plus, iPad Pro/Air 2/Mini, LG, Nexus, HTC and More USB C Charger, Anker 20W PIQ 3.0 Fast Charger with Foldable Plug, PowerPort III Charger for iPhone 13/13 Mini/13 Pro/13 Pro Max/12/11, iPad/iPad Mini, MagSafe, and More (Cable Not Included) Magnetic Wireless Charger, Anker Wireless Charger with 5ft Built-in USB-C Cable, PowerWave Magnetic Pad, 7.5W Charging for iPhone 13 / 13 Pro / 13 Pro Max / 13 mini / 12 / 12 Pro (No AC Adapter) USB C Super Fast Charger, Anker 25W PD Wall Charger Fast Charging for Samsung Galaxy S21/S21+/S21 Ultra/S20/Z Flip/Note20/20 Ultra/Note10/10+/S9/S8/S10e, iPad Pro 12.9, and More (Cable not Included) The price ranges from $8.99 to $59.99" + }, + "intent_template_id": 204 + }, + { + "sites": [ + "shopping" + ], + "task_id": 281, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Please provide me with the complete product names of Oral B brush heads designed for children, along with their corresponding price range per brush", + "instantiation_dict": {}, + "intent": "Please provide me with the complete product names of Oral B brush heads designed for children, along with their corresponding price range per brush", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Oral-B Kids Extra Soft Replacement Brush Heads featuring STAR WARS, 2 count", + "Kids By Oral-b Stages Power Star Wars Replacement Heads 4 Pack", + "3.745", + "6.495" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "These models are availiable: Oral-B Kids Extra Soft Replacement Brush Heads featuring STAR WARS, 2 count Kids By Oral-b Stages Power Star Wars Replacement Heads 4 Pack The price ranges from $3.745 to $6.495 " + }, + "intent_template_id": 204 + }, + { + "sites": [ + "shopping" + ], + "task_id": 282, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "List the full product names of slide slippers from Nike and tell me the price range of the available products", + "instantiation_dict": {}, + "intent": "List the full product names of slide slippers from Nike and tell me the price range of the available products", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Nike Men's Air Max Camden Slide Sandal", + "Nike Men's Benassi JDI Fanny Pack Slides", + "Nike Victori One Mens Comfort Slide Cn9675-003 (Midnight Navy/Midnight Navy/White, Numeric_10)", + "Nike Offcourt Slide Mens Bq4639-002 Size 12", + "Nike Jordan Men's Break Slide Red AR6374-602", + "Nike Victori One Slide Mens Style : Dd9559-300", + "Nike Men's Benassi Solarsoft Slide Athletic Sandal (Black/White, numeric_14)", + "Nike Men's Benassi Solarsoft Slide Athletic Sandal (Midnight Navy/Blue, numeric_8)", + "Nike womens Benassi Just Do It", + "27.6", + "90.65" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "These models are availiable: Nike Men's Air Max Camden Slide Sandal Nike Men's Benassi JDI Fanny Pack Slides Nike Victori One Mens Comfort Slide Cn9675-003 (Midnight Navy/Midnight Navy/White, Numeric_10) Nike Offcourt Slide Mens Bq4639-002 Size 12 Nike Jordan Men's Break Slide Red AR6374-602 Nike Victori One Slide Mens Style : Dd9559-300 Nike Men's Benassi Solarsoft Slide Athletic Sandal (Black/White, numeric_14) Nike Men's Benassi Solarsoft Slide Athletic Sandal (Midnight Navy/Blue, numeric_8) Nike womens Benassi Just Do It The price ranges from $27.6 to $90.65" + }, + "intent_template_id": 204 + }, + { + "sites": [ + "shopping" + ], + "task_id": 283, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Look up the most recent models of XBox controllers released between 2020-2021?", + "instantiation_dict": {}, + "intent": "Look up the most recent models of XBox controllers released between 2020-2021?", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/microsoft-xbox-controller-carbon-black-for-series-x-series-s-xbox-one-windows-10-android-ios-bundled-with-dual-port-charging-dock-xbox-controller-skin-voucher-premgear-cloth.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 210 + }, + { + "sites": [ + "shopping" + ], + "task_id": 284, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show the least expensive {{product}} with a minimum storage capacity of {{min_storage}}.", + "instantiation_dict": { + "product": "shoe storage", + "min_storage": "12 pairs" + }, + "intent": "Show the least expensive shoe storage with a minimum storage capacity of 12 pairs.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/onlyeasy-over-the-door-shoe-storage-organizer-hanging-shoe-rack-holder-with-24-large-fabric-pockets-22-1-x-61-4-herringbone-grey-mxrodsb1p.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 207 + }, + { + "sites": [ + "shopping" + ], + "task_id": 285, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show the least expensive {{product}} with a minimum storage capacity of {{min_storage}}.", + "instantiation_dict": { + "product": "switch card holder", + "min_storage": "15 cards" + }, + "intent": "Show the least expensive switch card holder with a minimum storage capacity of 15 cards.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/game-card-holder-storage-case-for-nintendo-switch-games-or-ps-vita-game-case-or-sd-memory-cards-black.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 207 + }, + { + "sites": [ + "shopping" + ], + "task_id": 286, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show the least expensive {{product}} with a minimum storage capacity of {{min_storage}}.", + "instantiation_dict": { + "product": "ssd hard drive", + "min_storage": "1TB" + }, + "intent": "Show the least expensive ssd hard drive with a minimum storage capacity of 1TB.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/external-hard-drive-2tb-ultra-thin-external-hard-drive-2000gb-ultra-high-speed-portable-3-1-type-c-storage-drive-compatible-with-pc-laptop-and-mac-2tb-a1.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 207 + }, + { + "sites": [ + "map" + ], + "task_id": 287, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "How much time does it take from Pittsburgh to Philadelphia by car?", + "instantiation_dict": {}, + "intent": "How much time does it take from Pittsburgh to Philadelphia by car?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "5h 47min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "5h 47min" + }, + "intent_template_id": 47 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 288, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the customer who has the most cancellations in the history", + "instantiation_dict": { + "attribute": "name" + }, + "intent": "Tell me the name of the customer who has the most cancellations in the history", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Samantha Jones" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Samantha Jones" + }, + "intent_template_id": 234 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 289, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the customer who has the most cancellations in the history", + "instantiation_dict": { + "attribute": "email address, name, phone number" + }, + "intent": "Tell me the email address, name, phone number of the customer who has the most cancellations in the history", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "email: coolcat321@hotmail.com", + "name: Samantha Jones", + "phone number: 3055551212" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "email: coolcat321@hotmail.com name: Samantha Jones phone number: 3055551212" + }, + "intent_template_id": 234 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 290, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the customer who has the most cancellations in the history", + "instantiation_dict": { + "attribute": "product SKUs in the most recent cancelled orders" + }, + "intent": "Tell me the product SKUs in the most recent cancelled orders of the customer who has the most cancellations in the history", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "WSH09-29-White", + "WSH09-28-Green", + "MSH11-34-Blue", + "WP09-29-Purple" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "WSH09-29-White,WSH09-28-Green,MSH11-34-Blue,WP09-29-Purple" + }, + "intent_template_id": 234 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 291, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the customer who has the most cancellations in the history", + "instantiation_dict": { + "attribute": "total spend on products in the most recent cancelled orders" + }, + "intent": "Tell me the total spend on products in the most recent cancelled orders of the customer who has the most cancellations in the history", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "148" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$148" + }, + "intent_template_id": 234 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 292, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the customer who has the most cancellations in the history", + "instantiation_dict": { + "attribute": "total number of cancellations" + }, + "intent": "Tell me the total number of cancellations of the customer who has the most cancellations in the history", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "9" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "9" + }, + "intent_template_id": 234 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 293, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Show me the command to clone {{repo}} with SSH.", + "instantiation_dict": { + "repo": "Super_Awesome_Robot" + }, + "intent": "Show me the command to clone Super_Awesome_Robot with SSH.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "git clone ssh://git@metis.lti.cs.cmu.edu:2222/convexegg/super_awesome_robot.git" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "git clone ssh://git@metis.lti.cs.cmu.edu:2222/convexegg/super_awesome_robot.git" + }, + "intent_template_id": 329 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 294, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Show me the command to clone {{repo}} with SSH.", + "instantiation_dict": { + "repo": "ChatGPT" + }, + "intent": "Show me the command to clone ChatGPT with SSH.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "git clone ssh://git@metis.lti.cs.cmu.edu:2222/convexegg/chatgpt.git" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "git clone ssh://git@metis.lti.cs.cmu.edu:2222/convexegg/chatgpt.git" + }, + "intent_template_id": 329 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 295, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Show me the command to clone {{repo}} with SSH.", + "instantiation_dict": { + "repo": "metaseq" + }, + "intent": "Show me the command to clone metaseq with SSH.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "git clone ssh://git@metis.lti.cs.cmu.edu:2222/root/metaseq.git" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "git clone ssh://git@metis.lti.cs.cmu.edu:2222/root/metaseq.git" + }, + "intent_template_id": 329 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 296, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Show me the command to clone {{repo}} with SSH.", + "instantiation_dict": { + "repo": "the best GAN python implementation" + }, + "intent": "Show me the command to clone the best GAN python implementation with SSH.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "ssh://git@metis.lti.cs.cmu.edu:2222/eriklindernoren/PyTorch-GAN.git" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "ssh://git@metis.lti.cs.cmu.edu:2222/eriklindernoren/PyTorch-GAN.git" + }, + "intent_template_id": 329 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 297, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Show me the command to clone {{repo}} with SSH.", + "instantiation_dict": { + "repo": "the most stared Covid location tracker" + }, + "intent": "Show me the command to clone the most stared Covid location tracker with SSH.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "ssh://git@metis.lti.cs.cmu.edu:2222/yjlou/2019-nCov.git" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "ssh://git@metis.lti.cs.cmu.edu:2222/yjlou/2019-nCov.git" + }, + "intent_template_id": 329 + }, + { + "sites": [ + "shopping" + ], + "task_id": 298, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show the most recent {{status}} order", + "instantiation_dict": { + "status": "completed" + }, + "intent": "Show the most recent completed order", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/sales/order/view/order_id/180/", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 180 + }, + { + "sites": [ + "shopping" + ], + "task_id": 299, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show the most recent {{status}} order", + "instantiation_dict": { + "status": "cancelled" + }, + "intent": "Show the most recent cancelled order", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/sales/order/view/order_id/170/", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 180 + }, + { + "sites": [ + "shopping" + ], + "task_id": 300, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show the most recent {{status}} order", + "instantiation_dict": { + "status": "pending" + }, + "intent": "Show the most recent pending order", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/sales/order/view/order_id/189/", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 180 + }, + { + "sites": [ + "shopping" + ], + "task_id": 301, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show the most recent {{status}} order", + "instantiation_dict": { + "status": "processing" + }, + "intent": "Show the most recent processing order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": {"fuzzy_match": "N/A"}, + "reference_url": "", + "program_html": [], + "string_note": "there is no order in processing" + }, + "intent_template_id": 180 + }, + { + "sites": [ + "shopping" + ], + "task_id": 302, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show the most recent {{status}} order", + "instantiation_dict": { + "status": "out of delivery" + }, + "intent": "Show the most recent out of delivery order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": {"fuzzy_match": "N/A"}, + "reference_url": "", + "program_html": [], + "string_note": "there is no order in processing" + }, + "intent_template_id": 180 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 303, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make {{period}}?", + "instantiation_dict": { + "user": "Kilian", + "period": "durning 2023" + }, + "intent": "How many commits did Kilian make durning 2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1" + }, + "intent_template_id": 321 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 304, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make {{period}}?", + "instantiation_dict": { + "user": "Eric", + "period": "between Feb 2023 and May 2023" + }, + "intent": "How many commits did Eric make between Feb 2023 and May 2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "14" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "14" + }, + "intent_template_id": 321 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 305, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make {{period}}?", + "instantiation_dict": { + "user": "Philip", + "period": "in 2023/1" + }, + "intent": "How many commits did Philip make in 2023/1?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 321 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 306, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make {{period}}?", + "instantiation_dict": { + "user": "Anthony", + "period": "between 08/2022-09/2022" + }, + "intent": "How many commits did Anthony make between 08/2022-09/2022?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 321 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 307, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make {{period}}?", + "instantiation_dict": { + "user": "Nic", + "period": "in April 2021" + }, + "intent": "How many commits did Nic make in April 2021?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "16" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "16" + }, + "intent_template_id": 321 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 308, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Tell me who has made the most contributions, in terms of number of commits, to the {{repo}} project", + "instantiation_dict": { + "repo": "primer/design" + }, + "intent": "Tell me who has made the most contributions, in terms of number of commits, to the primer/design project", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Shawn Allen" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Shawn Allen" + }, + "intent_template_id": 323 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 309, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Tell me who has made the most contributions, in terms of number of commits, to the {{repo}} project", + "instantiation_dict": { + "repo": "thoughtbot/administrate" + }, + "intent": "Tell me who has made the most contributions, in terms of number of commits, to the thoughtbot/administrate project", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Grayson Wright" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Grayson Wright" + }, + "intent_template_id": 323 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 310, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Tell me who has made the most contributions, in terms of number of commits, to the {{repo}} project", + "instantiation_dict": { + "repo": "AndroidSlidingUpPanel" + }, + "intent": "Tell me who has made the most contributions, in terms of number of commits, to the AndroidSlidingUpPanel project", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "tokudu" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "tokudu" + }, + "intent_template_id": 323 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 311, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Tell me who has made the most contributions, in terms of number of commits, to the {{repo}} project", + "instantiation_dict": { + "repo": "Pytorch GAN" + }, + "intent": "Tell me who has made the most contributions, in terms of number of commits, to the Pytorch GAN project", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Erik Linder-Nor\u00e9n" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Erik Linder-Nor\u00e9n" + }, + "intent_template_id": 323 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 312, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Tell me who has made the most contributions, in terms of number of commits, to the {{repo}} project", + "instantiation_dict": { + "repo": "csvkit" + }, + "intent": "Tell me who has made the most contributions, in terms of number of commits, to the csvkit project", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Christopher Groskopf" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Christopher Groskopf" + }, + "intent_template_id": 323 + }, + { + "sites": [ + "shopping" + ], + "task_id": 313, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Which number to call for the customer service?", + "instantiation_dict": {}, + "intent": "Which number to call for the customer service?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no phone number in the website", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 134 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 314, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "List the {{attribute}} of the top 3 contributors to {{repo}} repo, ranked by the number of commits?", + "instantiation_dict": { + "repo": "prime/design", + "attribute": "name" + }, + "intent": "List the name of the top 3 contributors to prime/design repo, ranked by the number of commits?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Shawn Allen", + "Inayaili Le\u00f3n", + "Aurora Pleguezuelo" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Shawn Allen, Inayaili Le\u00f3n, Aurora Pleguezuelo" + }, + "intent_template_id": 324 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 315, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "List the {{attribute}} of the top 3 contributors to {{repo}} repo, ranked by the number of commits?", + "instantiation_dict": { + "repo": "Pytorch GAN", + "attribute": "email address" + }, + "intent": "List the email address of the top 3 contributors to Pytorch GAN repo, ranked by the number of commits?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "eriklindernoren@live.se", + "eriklindernoren@gmail.com", + "pinnacle.chen@qq.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "eriklindernoren@live.se, eriklindernoren@gmail.com, pinnacle.chen@qq.com" + }, + "intent_template_id": 324 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 316, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "List the {{attribute}} of the top 3 contributors to {{repo}} repo, ranked by the number of commits?", + "instantiation_dict": { + "repo": "facebook's guide on building react apps", + "attribute": "name" + }, + "intent": "List the name of the top 3 contributors to facebook's guide on building react apps repo, ranked by the number of commits?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Ian Sutherland", + "Joe Hadda", + "Dan Abramov" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Ian Sutherland, Joe Hadda, Dan Abramov" + }, + "intent_template_id": 324 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 317, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "List the {{attribute}} of the top 3 contributors to {{repo}} repo, ranked by the number of commits?", + "instantiation_dict": { + "repo": "metaseq", + "attribute": "name and number of commits" + }, + "intent": "List the name and number of commits of the top 3 contributors to metaseq repo, ranked by the number of commits?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Susan Zhang: 70", + "Stephen Roller: 51", + "Peter Albert: 12" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Susan Zhang: 70, Stephen Roller: 51, Peter Albert: 12" + }, + "intent_template_id": 324 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 318, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "List the {{attribute}} of the top 3 contributors to {{repo}} repo, ranked by the number of commits?", + "instantiation_dict": { + "repo": "2019-nCov", + "attribute": "last names" + }, + "intent": "List the last names of the top 3 contributors to 2019-nCov repo, ranked by the number of commits?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Lo", + "Chen", + "Chu" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lo, Chen, Chu" + }, + "intent_template_id": 324 + }, + { + "sites": [ + "shopping" + ], + "task_id": 319, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "How much refund I should expect from my order canlled in {{time}}, including shipping fee", + "instantiation_dict": { + "time": "April 2022" + }, + "intent": "How much refund I should expect from my order canlled in April 2022, including shipping fee", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 160 + }, + { + "sites": [ + "shopping" + ], + "task_id": 320, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "How much refund I should expect from my order canlled in {{time}}, including shipping fee", + "instantiation_dict": { + "time": "Feb 2023" + }, + "intent": "How much refund I should expect from my order canlled in Feb 2023, including shipping fee", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "406.53" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "406.53" + }, + "intent_template_id": 160 + }, + { + "sites": [ + "shopping" + ], + "task_id": 321, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "How much refund I should expect from my order canlled in {{time}}, including shipping fee", + "instantiation_dict": { + "time": "2022" + }, + "intent": "How much refund I should expect from my order canlled in 2022, including shipping fee", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "3053.97" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3053.97" + }, + "intent_template_id": 160 + }, + { + "sites": [ + "shopping" + ], + "task_id": 322, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "How much refund I should expect from my order canlled in {{time}} if I cannot get the shipping fee refunded?", + "instantiation_dict": { + "time": "May 2023" + }, + "intent": "How much refund I should expect from my order canlled in May 2023 if I cannot get the shipping fee refunded?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "350.42" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "350.42" + }, + "intent_template_id": 160 + }, + { + "sites": [ + "shopping" + ], + "task_id": 323, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "How much refund I should expect from my order canlled in {{time}}? I only kept the AC-DC Adapter and the shop told me that I cannot get the shipping fee back", + "instantiation_dict": { + "time": "2022/03" + }, + "intent": "How much refund I should expect from my order canlled in 2022/03? I only kept the AC-DC Adapter and the shop told me that I cannot get the shipping fee back", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "264.49" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "264.49" + }, + "intent_template_id": 160 + }, + { + "sites": [ + "shopping" + ], + "task_id": 324, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show me the \"{{product}}\" listings by {{sorting_order}}.", + "instantiation_dict": { + "product": "chairs", + "sorting_order": "ascending price" + }, + "intent": "Show me the \"chairs\" listings by ascending price.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/catalogsearch/result/index/?product_list_order=price&q=chairs&product_list_dir=asc", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 208 + }, + { + "sites": [ + "shopping" + ], + "task_id": 325, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show me the \"{{product}}\" listings by {{sorting_order}}.", + "instantiation_dict": { + "product": "mouth night guard", + "sorting_order": "descending price" + }, + "intent": "Show me the \"mouth night guard\" listings by descending price.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/catalogsearch/result/index/?q=mouth%20night%20guard%20&product_list_order=price", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 208 + }, + { + "sites": [ + "shopping" + ], + "task_id": 326, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show me the \"{{product}}\" listings by {{sorting_order}}.", + "instantiation_dict": { + "product": "Canon photo printer", + "sorting_order": "search relevance, from most to least" + }, + "intent": "Show me the \"Canon photo printer\" listings by search relevance, from most to least.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/catalogsearch/result/?q=Canon+photo+printer", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 208 + }, + { + "sites": [ + "shopping" + ], + "task_id": 327, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show me the \"{{product}}\" listings by {{sorting_order}}.", + "instantiation_dict": { + "product": "iphone 12 phone case", + "sorting_order": "name alphabetically" + }, + "intent": "Show me the \"iphone 12 phone case\" listings by name alphabetically.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/catalogsearch/result/index/?q=%20iphone%2012%20phone%20case&product_list_order=name", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 208 + }, + { + "sites": [ + "shopping" + ], + "task_id": 328, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show me the \"{{product}}\" listings by {{sorting_order}}.", + "instantiation_dict": { + "product": "iphone 12 phone case", + "sorting_order": "price" + }, + "intent": "Show me the \"iphone 12 phone case\" listings by price.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/catalogsearch/result/index/?product_list_order=price&q=%20iphone%2012%20phone%20case", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 208 + }, + { + "sites": [ + "shopping" + ], + "task_id": 329, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "How much I spend {{time}} on shopping at One Stop Market?", + "instantiation_dict": { + "time": "on 4/19/2023" + }, + "intent": "How much I spend on 4/19/2023 on shopping at One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 147 + }, + { + "sites": [ + "shopping" + ], + "task_id": 330, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "How much I spend {{time}} on shopping at One Stop Market?", + "instantiation_dict": { + "time": "in March 2023" + }, + "intent": "How much I spend in March 2023 on shopping at One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "81.31" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "81.31" + }, + "intent_template_id": 147 + }, + { + "sites": [ + "shopping" + ], + "task_id": 331, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "How much I spend {{time}} on shopping at One Stop Market?", + "instantiation_dict": { + "time": "in July 2022" + }, + "intent": "How much I spend in July 2022 on shopping at One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "40.16" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "40.16" + }, + "intent_template_id": 147 + }, + { + "sites": [ + "shopping" + ], + "task_id": 332, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "How much I spend {{time}} on shopping at One Stop Market?", + "instantiation_dict": { + "time": "each month from Jan to the end of March 2023" + }, + "intent": "How much I spend each month from Jan to the end of March 2023 on shopping at One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Jan: 572.8", + "Feb: 762.18", + "Mar: 83.31" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Jan: 572.8\nFeb: 762.18\nMar: 83.31" + }, + "intent_template_id": 147 + }, + { + "sites": [ + "shopping" + ], + "task_id": 333, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "How much did I spend on shopping at One Stop Market {{time}}? They gave me a 20% discount on the total amount for orders exceeding $200 in cash", + "instantiation_dict": { + "time": "on November 2022" + }, + "intent": "How much did I spend on shopping at One Stop Market on November 2022? They gave me a 20% discount on the total amount for orders exceeding $200 in cash", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "359.546" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "359.546" + }, + "intent_template_id": 147 + }, + { + "sites": [ + "shopping" + ], + "task_id": 334, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Tell me when I last ordered my {{description}}?", + "instantiation_dict": { + "description": "muffin cornbread mix" + }, + "intent": "Tell me when I last ordered my muffin cornbread mix?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "March 11th 2023" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "March 11th 2023" + }, + "intent_template_id": 169 + }, + { + "sites": [ + "shopping" + ], + "task_id": 335, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Tell me when I last ordered my {{description}}?", + "instantiation_dict": { + "description": "body butter" + }, + "intent": "Tell me when I last ordered my body butter?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "January 16th 2023" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "January 16th 2023" + }, + "intent_template_id": 169 + }, + { + "sites": [ + "shopping" + ], + "task_id": 336, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Tell me when I last ordered my {{description}}?", + "instantiation_dict": { + "description": "conditioner" + }, + "intent": "Tell me when I last ordered my conditioner?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "January 16th 2023" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "January 16th 2023" + }, + "intent_template_id": 169 + }, + { + "sites": [ + "shopping" + ], + "task_id": 337, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Tell me when I last ordered my {{description}}?", + "instantiation_dict": { + "description": "bread olive" + }, + "intent": "Tell me when I last ordered my bread olive?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "December 12th 2022" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "December 12th 2022" + }, + "intent_template_id": 169 + }, + { + "sites": [ + "shopping" + ], + "task_id": 338, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Tell me when I last ordered my {{description}}?", + "instantiation_dict": { + "description": "toothpaste" + }, + "intent": "Tell me when I last ordered my toothpaste?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "December 4th 2022" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "December 4th 2022" + }, + "intent_template_id": 169 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 339, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "List all opened issues {{description}}", + "instantiation_dict": { + "description": "that report bugs" + }, + "intent": "List all opened issues that report bugs", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/a11yproject/a11yproject.com/-/issues/?label_name%5B%5D=bug", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 299 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 340, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/primer/design", + "geolocation": null, + "intent_template": "List all opened issues {{description}}", + "instantiation_dict": { + "description": "that report bugs" + }, + "intent": "List all opened issues that report bugs", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/primer/design/-/issues/?label_name%5B%5D=type%3A%20bug%20%F0%9F%90%9E", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 299 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 341, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/root/metaseq", + "geolocation": null, + "intent_template": "List all opened issues {{description}}", + "instantiation_dict": { + "description": "requesting new features" + }, + "intent": "List all opened issues requesting new features", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/root/metaseq/-/issues/?label_name%5B%5D=enhancement", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 299 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 342, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/root/metaseq", + "geolocation": null, + "intent_template": "List all opened issues {{description}}", + "instantiation_dict": { + "description": "that ask about OPT model related questions" + }, + "intent": "List all opened issues that ask about OPT model related questions", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/root/metaseq/-/issues/?search=OPT&label_name%5B%5D=question", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 299 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 343, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/root/metaseq", + "geolocation": null, + "intent_template": "List all opened issues {{description}}", + "instantiation_dict": { + "description": "that don't have any labels" + }, + "intent": "List all opened issues that don't have any labels", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/root/metaseq/-/issues/?label_name%5B%5D=None", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 299 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 344, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "How many reviews our shop received {{time}}?", + "instantiation_dict": { + "time": "by far" + }, + "intent": "How many reviews our shop received by far?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "351" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "351" + }, + "intent_template_id": 248 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 345, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "How many reviews our shop received {{time}}?", + "instantiation_dict": { + "time": "in Apr 2023" + }, + "intent": "How many reviews our shop received in Apr 2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "351" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "351" + }, + "intent_template_id": 248 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 346, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "How many reviews our shop received {{time}}?", + "instantiation_dict": { + "time": "during 2022" + }, + "intent": "How many reviews our shop received during 2022?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 248 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 347, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "How many reviews our shop received {{time}}?", + "instantiation_dict": { + "time": "from the beginning of the shop" + }, + "intent": "How many reviews our shop received from the beginning of the shop?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "351" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "351" + }, + "intent_template_id": 248 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 348, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "How many reviews our shop received {{time}}?", + "instantiation_dict": { + "time": "in May 2023" + }, + "intent": "How many reviews our shop received in May 2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 248 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 349, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Who else have access to my repo {{repo}}, show me their usernames", + "instantiation_dict": { + "repo": "gimmiethat.space" + }, + "intent": "Who else have access to my repo gimmiethat.space, show me their usernames", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "yjlou" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "yjlou" + }, + "intent_template_id": 298 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 350, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Who else have access to my repo {{repo}}, show me their usernames", + "instantiation_dict": { + "repo": "prism-theme" + }, + "intent": "Who else have access to my repo prism-theme, show me their usernames", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "abisubramanya27" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Abishek S, abisubramanya27" + }, + "intent_template_id": 298 + }, + { + "sites": [ + "shopping" + ], + "task_id": 351, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "List products from {{product_category}} category by {{order}} price", + "instantiation_dict": { + "product_category": "PS4 accessories", + "order": "ascending" + }, + "intent": "List products from PS4 accessories category by ascending price", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/video-games/playstation-4/accessories.html?product_list_order=price", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 137 + }, + { + "sites": [ + "shopping" + ], + "task_id": 352, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "List products from {{product_category}} category by {{order}} price", + "instantiation_dict": { + "product_category": "nutrition bars and drinks", + "order": "ascending" + }, + "intent": "List products from nutrition bars and drinks category by ascending price", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/health-household/diet-sports-nutrition/nutrition-bars-drinks.html?product_list_order=price", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 137 + }, + { + "sites": [ + "shopping" + ], + "task_id": 353, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "List products from {{product_category}} category by {{order}} price", + "instantiation_dict": { + "product_category": "competative swimwear", + "order": "ascending" + }, + "intent": "List products from competative swimwear category by ascending price", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/clothing-shoes-jewelry/sport-specific-clothing/competitive-swimwear.html?product_list_order=price", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 137 + }, + { + "sites": [ + "shopping" + ], + "task_id": 354, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "List products from {{product_category}} category by {{order}} price", + "instantiation_dict": { + "product_category": "living room furtniture", + "order": "descending" + }, + "intent": "List products from living room furtniture category by descending price", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/home-kitchen/furniture/living-room-furniture.html?product_list_order=price&product_list_dir=desc", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 137 + }, + { + "sites": [ + "shopping" + ], + "task_id": 355, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "List products from {{product_category}} category by {{order}} price", + "instantiation_dict": { + "product_category": "kids' bedding", + "order": "descending" + }, + "intent": "List products from kids' bedding category by descending price", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/home-kitchen/bedding/kids-bedding.html?product_list_dir=desc", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 137 + }, + { + "sites": [ + "map" + ], + "task_id": 356, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Show the route from SCS CMU in Pittsburgh to the location where the Declaration of Independence and Constitution were signed", + "instantiation_dict": {}, + "intent": "Show the route from SCS CMU in Pittsburgh to the location where the Declaration of Independence and Constitution were signed", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Gates and Hillman Centers", + "Pittsburgh" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Independence Hall", + "Philadelphia" + ] + } + } + ] + }, + "intent_template_id": 49 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 357, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Checkout merge requests requiring my review", + "instantiation_dict": {}, + "intent": "Checkout merge requests requiring my review", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/dashboard/merge_requests?reviewer_username=byteblaze", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 291 + }, + { + "sites": [ + "shopping" + ], + "task_id": 358, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show me the {{info}} for order number {{order_number}}.", + "instantiation_dict": { + "info": "shipping method", + "order_number": 187 + }, + "intent": "Show me the shipping method for order number 187.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Flat Rate - Fixed" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Flat Rate - Fixed" + }, + "intent_template_id": 206 + }, + { + "sites": [ + "shopping" + ], + "task_id": 359, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show me the {{info}} for order number {{order_number}}.", + "instantiation_dict": { + "info": "order date", + "order_number": "148" + }, + "intent": "Show me the order date for order number 148.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "1/29/2023" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1/29/2023" + }, + "intent_template_id": 206 + }, + { + "sites": [ + "shopping" + ], + "task_id": 360, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show me the {{info}} for order number {{order_number}}.", + "instantiation_dict": { + "info": "product names", + "order_number": "148" + }, + "intent": "Show me the product names for order number 148.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Bornbridge Artificial Spiral Topiary Tree - Indoor / Outdoor Topiary Trees - Artificial Outdoor Plants (2 Pack, 4' Cypress)", + "Russound 5B45W 4\" Indoor Outdoor Speakers White" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Bornbridge Artificial Spiral Topiary Tree - Indoor / Outdoor Topiary Trees - Artificial Outdoor Plants (2 Pack, 4' Cypress), Russound 5B45W 4\" Indoor Outdoor Speakers White" + }, + "intent_template_id": 206 + }, + { + "sites": [ + "shopping" + ], + "task_id": 361, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show me the {{info}} for order number {{order_number}}.", + "instantiation_dict": { + "info": "order statuses", + "order_number": "170 and 189" + }, + "intent": "Show me the order statuses for order number 170 and 189.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "170: cancelled", + "189: pending" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "170: cancelled, 189: pending" + }, + "intent_template_id": 206 + }, + { + "sites": [ + "shopping" + ], + "task_id": 362, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show me the {{info}} for order number {{order_number}}.", + "instantiation_dict": { + "info": "billing address", + "order_number": "00178" + }, + "intent": "Show me the billing address for order number 00178.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "101 S San Mateo Dr", + "San Mateo", + "California", + "94010", + "United States" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Emma Lopez, 101 S San Mateo Dr, San Mateo, California, 94010, United States" + }, + "intent_template_id": 206 + }, + { + "sites": [ + "map" + ], + "task_id": 363, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Measure distance between {{location/address_1}} and {{location/address_2}} by walking", + "instantiation_dict": { + "location/address_1": "Carnegie Mellon University", + "location/address_2": "Carnegie Music Hall" + }, + "intent": "Measure distance between Carnegie Mellon University and Carnegie Music Hall by walking", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "748m" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "748m" + }, + "intent_template_id": 58 + }, + { + "sites": [ + "map" + ], + "task_id": 364, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Measure distance between {{location/address_1}} and {{location/address_2}} by walking", + "instantiation_dict": { + "location/address_1": "Carnegie Mellon University", + "location/address_2": "UPMC Shadyside" + }, + "intent": "Measure distance between Carnegie Mellon University and UPMC Shadyside by walking", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "1.7km" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1.7km" + }, + "intent_template_id": 58 + }, + { + "sites": [ + "map" + ], + "task_id": 365, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Measure distance between {{location/address_1}} and {{location/address_2}} by walking", + "instantiation_dict": { + "location/address_1": "Carnegie Music Hall", + "location/address_2": "UPMC Shadyside" + }, + "intent": "Measure distance between Carnegie Music Hall and UPMC Shadyside by walking", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "2.2km" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "2.2km" + }, + "intent_template_id": 58 + }, + { + "sites": [ + "map" + ], + "task_id": 366, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Measure distance between {{location/address_1}} and {{location/address_2}} by walking", + "instantiation_dict": { + "location/address_1": "CVS (closet one)", + "location/address_2": "UPMC Shadyside" + }, + "intent": "Measure distance between CVS (closet one) and UPMC Shadyside by walking", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "1.2km" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1.2km" + }, + "intent_template_id": 58 + }, + { + "sites": [ + "map" + ], + "task_id": 367, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Measure distance between {{location/address_1}} and {{location/address_2}} by walking", + "instantiation_dict": { + "location/address_1": "Carnegie Mellon University", + "location/address_2": "CVS (closet one)" + }, + "intent": "Measure distance between Carnegie Mellon University and CVS (closet one) by walking", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "1.4km" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1.4km" + }, + "intent_template_id": 58 + }, + { + "sites": [ + "shopping" + ], + "task_id": 368, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "find discounted items.", + "instantiation_dict": {}, + "intent": "find discounted items.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no function to show only discount items", + "reference_answer_raw_annotation": "There is no function to show only discount items." + }, + "intent_template_id": 188 + }, + { + "sites": [ + "map" + ], + "task_id": 369, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Pull up the description page of {{location}} on Map", + "instantiation_dict": { + "location": "Carnegie Music Hall" + }, + "intent": "Pull up the description page of Carnegie Music Hall on Map", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Carnegie Music Hall" + ] + } + } + ] + }, + "intent_template_id": 52 + }, + { + "sites": [ + "map" + ], + "task_id": 370, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Pull up the description page of {{location}} on Map", + "instantiation_dict": { + "location": "Carnegie Mellon University" + }, + "intent": "Pull up the description page of Carnegie Mellon University on Map", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Carnegie Mellon University" + ] + } + } + ] + }, + "intent_template_id": 52 + }, + { + "sites": [ + "map" + ], + "task_id": 371, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Pull up the description page of {{location}} on Map", + "instantiation_dict": { + "location": "Piada restaurant near Pitt" + }, + "intent": "Pull up the description page of Piada restaurant near Pitt on Map", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Piada Italian Street Food", + "Forbes Avenue" + ] + } + } + ] + }, + "intent_template_id": 52 + }, + { + "sites": [ + "map" + ], + "task_id": 372, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Pull up the description page of {{location}} on Map", + "instantiation_dict": { + "location": "the Costco in Pittsburhg near a river" + }, + "intent": "Pull up the description page of the Costco in Pittsburhg near a river on Map", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Costco", + "Waterfront Drive West" + ] + } + } + ] + }, + "intent_template_id": 52 + }, + { + "sites": [ + "map" + ], + "task_id": 373, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Pull up the description page of {{location}} on Map", + "instantiation_dict": { + "location": "Whole Foods near Carnegie Mellon" + }, + "intent": "Pull up the description page of Whole Foods near Carnegie Mellon on Map", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Whole Foods", + "East Liberty" + ] + } + } + ] + }, + "intent_template_id": 52 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 374, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Preview the {{name}} theme for my shop", + "instantiation_dict": { + "name": "Magento Blank" + }, + "intent": "Preview the Magento Blank theme for my shop", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/admin/system_design_theme/edit/id/1", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 266 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 375, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Preview the {{name}} theme for my shop", + "instantiation_dict": { + "name": "Magento Luma" + }, + "intent": "Preview the Magento Luma theme for my shop", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/admin/system_design_theme/edit/id/3/key/", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 266 + }, + { + "sites": [ + "shopping" + ], + "task_id": 376, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Summarize customer reviews for {{product}}.", + "instantiation_dict": { + "product": "Amazon Echo Dot 3rd generation" + }, + "intent": "Summarize customer reviews for Amazon Echo Dot 3rd generation.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no review for this product", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 182 + }, + { + "sites": [ + "map" + ], + "task_id": 377, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Find the {{space}} around {{location}}", + "instantiation_dict": { + "location": "CMU ArtPark Lab", + "space": "resturants" + }, + "intent": "Find the resturants around CMU ArtPark Lab", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "https://www.openstreetmap.org/search?query=restaurants%20near%20CMU%20ArtPark%20Lab", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 59 + }, + { + "sites": [ + "map" + ], + "task_id": 378, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Find the {{space}} around {{location}}", + "instantiation_dict": { + "location": "CMU main campus", + "space": "parking" + }, + "intent": "Find the parking around CMU main campus", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "https://www.openstreetmap.org/search?query=parking%20near%20carnegie%20mellon%20university", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 59 + }, + { + "sites": [ + "map" + ], + "task_id": 379, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Find the {{space}} around {{location}}", + "instantiation_dict": { + "location": "CMU main campus", + "space": "hotel" + }, + "intent": "Find the hotel around CMU main campus", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "https://www.openstreetmap.org/search?query=hotels%20near%20carnegie%20mellon%20university", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 59 + }, + { + "sites": [ + "map" + ], + "task_id": 380, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Find the {{space}} around {{location}}", + "instantiation_dict": { + "location": "Carnegie Music Hall", + "space": "bar" + }, + "intent": "Find the bar around Carnegie Music Hall", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "https://www.openstreetmap.org/search?query=bars%20near%20Carnegie%20Music%20Hall", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 59 + }, + { + "sites": [ + "map" + ], + "task_id": 381, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Find the {{space}} around {{location}}", + "instantiation_dict": { + "location": "Carnegie Music Hall", + "space": "hotel" + }, + "intent": "Find the hotel around Carnegie Music Hall", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "https://www.openstreetmap.org/search?query=hotels%20near%20Carnegie%20Music%20Hall", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 59 + }, + { + "sites": [ + "map" + ], + "task_id": 382, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "I am arriving at Carnegie Mellon University. Find the nearby US Citizenship and Immigration Services and the walking distance to the nearest Social Security Administration from US Citizenship and Immigration Services", + "instantiation_dict": {}, + "intent": "I am arriving at Carnegie Mellon University. Find the nearby US Citizenship and Immigration Services and the walking distance to the nearest Social Security Administration from US Citizenship and Immigration Services", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no USCIS nearby", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 781 + }, + { + "sites": [ + "map" + ], + "task_id": 383, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "I am arriving at Pittsburgh Airport. Show me the name of a Hyatt hotel if there is any nearby. Tell me the names of supermarkets that are within 15mins driving from the hotel", + "instantiation_dict": {}, + "intent": "I am arriving at Pittsburgh Airport. Show me the name of a Hyatt hotel if there is any nearby. Tell me the names of supermarkets that are within 15mins driving from the hotel", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Hyatt Regency Pittsburgh International Airport", + "Giant Eagle", + "ALDI" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Hyatt Regency Pittsburgh International Airport Giant Eagle, ALDI" + }, + "intent_template_id": 782 + }, + { + "sites": [ + "shopping" + ], + "task_id": 384, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "List the customer names who complain about the quality of EYZUTAK phone cases", + "instantiation_dict": {}, + "intent": "List the customer names who complain about the quality of EYZUTAK phone cases", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Lisa Lee", + "Evelyn Kurver", + "Amanda", + "N Randall" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lisa Lee, Evelyn Kurver, Amanda, N Randall" + }, + "intent_template_id": 666 + }, + { + "sites": [ + "shopping" + ], + "task_id": 385, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "List the customer names who thinks EYZUTAK phone cases are of good looking", + "instantiation_dict": {}, + "intent": "List the customer names who thinks EYZUTAK phone cases are of good looking", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Lisa Lee", + "MH", + "Misba009", + "Amanda", + "N Randall", + "Amazon Customer", + "Cally", + "Bethany Robertson" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lisa Lee, MH, Misba009, Amanda, N Randall, Amazon Customer, Cally, Bethany Robertson" + }, + "intent_template_id": 666 + }, + { + "sites": [ + "shopping" + ], + "task_id": 386, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What is the rating of {{product}}", + "instantiation_dict": { + "product": "Ugreen lightning to 3.5mm cable" + }, + "intent": "What is the rating of Ugreen lightning to 3.5mm cable. Please round to the nearest whole number", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "65 |OR| 3" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Around 3.1 stars" + }, + "intent_template_id": 1355 + }, + { + "sites": [ + "shopping" + ], + "task_id": 387, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Who gave {{stars}} for phone cases from EYZUTAK", + "instantiation_dict": { + "stars": "4 or 5 stars" + }, + "intent": "Who gave 4 or 5 stars for phone cases from EYZUTAK", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "MH", + "Misba009", + "Amanda", + "Amazon Customer", + "Cally", + "Bethany Robertson" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "MH, Misba009, Amanda, Amazon Customer, Cally, Bethany Robertson, Bethany Robertson" + }, + "intent_template_id": 1356 + }, + { + "sites": [ + "shopping" + ], + "task_id": 388, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Who gave {{stars}} for phone cases from EYZUTAK", + "instantiation_dict": { + "stars": "1 or 2 stars" + }, + "intent": "Who gave 1 or 2 stars for phone cases from EYZUTAK", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Evelyn Kurver", + "N Randall" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Evelyn Kurver, N Randall" + }, + "intent_template_id": 1356 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 389, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Post \"{{content}}\" for the merge request related to {{mr}} in {{repo}} project", + "instantiation_dict": { + "mr": "octovisuals page", + "repo": "primer/design", + "content": "Thanks, working on reviews" + }, + "intent": "Post \"Thanks, working on reviews\" for the merge request related to octovisuals page in primer/design project", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/primer/design/-/merge_requests/450", + "locator": "", + "required_contents": { + "must_include": [ + "Thanks, working on reviews" + ] + } + } + ] + }, + "intent_template_id": 348 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 390, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Post \"{{content}}\" for the merge request related to {{mr}} in {{repo}} project", + "instantiation_dict": { + "mr": "semantic HTML post", + "repo": "a11yproject/a11yproject.com", + "content": "lgtm" + }, + "intent": "Post \"lgtm\" for the merge request related to semantic HTML post in a11yproject/a11yproject.com project", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/a11yproject/a11yproject.com/-/merge_requests/1531", + "locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.querySelector('.timeline-discussion-body').outerText", + "required_contents": { + "exact_match": "lgtm" + } + } + ] + }, + "intent_template_id": 348 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 391, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Post \"{{content}}\" for the merge request related to {{mr}} in {{repo}} project", + "instantiation_dict": { + "mr": "focus edge cases", + "repo": "a11yproject/a11yproject.com", + "content": "close because non reproducible" + }, + "intent": "Post \"close because non reproducible\" for the merge request related to focus edge cases in a11yproject/a11yproject.com project", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/a11yproject/a11yproject.com/-/merge_requests/1265", + "locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.querySelector('.timeline-discussion-body').outerText", + "required_contents": { + "exact_match": "close because non reproducible" + } + } + ] + }, + "intent_template_id": 348 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 392, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Post \"{{content}}\" for the merge request related to {{mr}} in {{repo}} project", + "instantiation_dict": { + "mr": "color ulitity", + "repo": "a11yproject.com", + "content": "Good idea" + }, + "intent": "Post \"Good idea\" for the merge request related to color ulitity in a11yproject.com project", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/a11yproject/a11yproject.com/-/merge_requests/1071", + "locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.querySelector('.timeline-discussion-body').outerText", + "required_contents": { + "exact_match": "Good idea" + } + } + ] + }, + "intent_template_id": 348 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 393, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Post \"{{content}}\" for the merge request related to {{mr}} in {{repo}} project", + "instantiation_dict": { + "mr": "fixing the broken links", + "repo": "byteblaze/empathy-prompts", + "content": "lgtm" + }, + "intent": "Post \"lgtm\" for the merge request related to fixing the broken links in byteblaze/empathy-prompts project", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/empathy-prompts/-/merge_requests/19", + "locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.querySelector('.timeline-discussion-body').outerText", + "required_contents": { + "exact_match": "lgtm" + } + } + ] + }, + "intent_template_id": 348 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 394, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Fork {{repo}}.", + "instantiation_dict": { + "repo": "2019-nCov" + }, + "intent": "Fork 2019-nCov.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/2019-nCov", + "locator": "", + "required_contents": { + "must_include": [ + "2019-nCov" + ] + } + } + ] + }, + "intent_template_id": 352 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 395, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Fork {{repo}}.", + "instantiation_dict": { + "repo": "the Pytorch GAN repo with most stars" + }, + "intent": "Fork the Pytorch GAN repo with most stars.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/PyTorch-GAN", + "locator": "", + "required_contents": { + "must_include": [ + "Pytorch-GAN" + ] + } + } + ] + }, + "intent_template_id": 352 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 396, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Fork {{repo}}.", + "instantiation_dict": { + "repo": "ChatGPT" + }, + "intent": "Fork ChatGPT.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/ChatGPT", + "locator": "", + "required_contents": { + "must_include": [ + "ChatGPT" + ] + } + } + ] + }, + "intent_template_id": 352 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 397, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Fork {{repo}}.", + "instantiation_dict": { + "repo": "MetaSeq" + }, + "intent": "Fork MetaSeq.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/metaseq", + "locator": "", + "required_contents": { + "must_include": [ + "metaseq" + ] + } + } + ] + }, + "intent_template_id": 352 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 398, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Fork {{repo}}.", + "instantiation_dict": { + "repo": "all source repos from Akilesh Kannan" + }, + "intent": "Fork all source repos from Akilesh Kannan", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/SimCache", + "locator": "", + "required_contents": { + "must_include": [ + "SimCache" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/dots", + "locator": "", + "required_contents": { + "must_include": [ + "dots" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/CacheEval", + "locator": "", + "required_contents": { + "must_include": [ + "CacheEval" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/nvidia-patch", + "locator": "", + "required_contents": { + "must_include": [ + "404" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/viewgrades-scraper", + "locator": "", + "required_contents": { + "must_include": [ + "404" + ] + } + } + ] + }, + "intent_template_id": 352 + }, + { + "sites": [ + "reddit" + ], + "task_id": 399, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Change my reddit bio to \"{{content}}\"", + "instantiation_dict": { + "content": "I am a robot" + }, + "intent": "Change my reddit bio to \"I am a robot\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/user/MarvelsGrantMan136", + "locator": "document.querySelector(\".user-bio__biography\").outerText", + "required_contents": { + "exact_match": "I am a robot" + } + } + ] + }, + "intent_template_id": 6 + }, + { + "sites": [ + "reddit" + ], + "task_id": 400, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Change my reddit bio to \"{{content}}\"", + "instantiation_dict": { + "content": "Pro Python Developer with 20 years of Experience" + }, + "intent": "Change my reddit bio to \"Pro Python Developer with 20 years of Experience\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/user/MarvelsGrantMan136", + "locator": "document.querySelector(\".user-bio__biography\").outerText", + "required_contents": { + "exact_match": "Pro Python Developer with 20 years of Experience" + } + } + ] + }, + "intent_template_id": 6 + }, + { + "sites": [ + "reddit" + ], + "task_id": 401, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Change my reddit bio to \"{{content}}\"", + "instantiation_dict": { + "content": "Seeking SDE positions" + }, + "intent": "Change my reddit bio to \"Seeking SDE positions\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/user/MarvelsGrantMan136", + "locator": "document.querySelector(\".user-bio__biography\").outerText", + "required_contents": { + "exact_match": "Seeking SDE positions" + } + } + ] + }, + "intent_template_id": 6 + }, + { + "sites": [ + "reddit" + ], + "task_id": 402, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Change my reddit bio to \"{{content}}\"", + "instantiation_dict": { + "content": "Freelance Web Developer" + }, + "intent": "Change my reddit bio to \"Freelance Web Developer\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/user/MarvelsGrantMan136", + "locator": "document.querySelector(\".user-bio__biography\").outerText", + "required_contents": { + "exact_match": "Freelance Web Developer" + } + } + ] + }, + "intent_template_id": 6 + }, + { + "sites": [ + "reddit" + ], + "task_id": 403, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Change my reddit bio to \"{{content}}\"", + "instantiation_dict": { + "content": "Awesome Prompt Artist" + }, + "intent": "Change my reddit bio to \"Awesome Prompt Artist\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/user/MarvelsGrantMan136", + "locator": "document.querySelector(\".user-bio__biography\").outerText", + "required_contents": { + "exact_match": "Awesome Prompt Artist" + } + } + ] + }, + "intent_template_id": 6 + }, + { + "sites": [ + "reddit" + ], + "task_id": 404, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Upvote the newest post in {{subreddit}} subreddit", + "instantiation_dict": { + "subreddit": "books" + }, + "intent": "Upvote the newest post in books subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/books/124260/adults-reading-to-each-other-out-loud", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 22 + }, + { + "sites": [ + "reddit" + ], + "task_id": 405, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Upvote the newest post in {{subreddit}} subreddit", + "instantiation_dict": { + "subreddit": "DIY" + }, + "intent": "Upvote the newest post in DIY subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/DIY/119019/how-can-i-bring-an-hdmi-cable-from-my-pc-downstairs-to-my-tv", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 22 + }, + { + "sites": [ + "reddit" + ], + "task_id": 406, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Upvote the newest post in {{subreddit}} subreddit", + "instantiation_dict": { + "subreddit": "future technology" + }, + "intent": "Upvote the newest post in future technology subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/Futurology/119517/openai-ceo-it-s-not-funny-that-i-m-afraid-of-the-ai-we-re", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 22 + }, + { + "sites": [ + "reddit" + ], + "task_id": 407, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Upvote the newest post in {{subreddit}} subreddit", + "instantiation_dict": { + "subreddit": "deeplearning" + }, + "intent": "Upvote the newest post in deeplearning subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/deeplearning/124993/meta-s-llama-weights-leaked-on-torrent-and-the-best-thing", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 22 + }, + { + "sites": [ + "reddit" + ], + "task_id": 408, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Upvote the newest post in {{subreddit}} subreddit", + "instantiation_dict": { + "subreddit": "explain like im 5" + }, + "intent": "Upvote the newest post in explain like im 5 subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/explainlikeimfive/39244/eli5-how-does-pinching-a-ribbon-and-sliding-your-finger", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 22 + }, + { + "sites": [ + "reddit" + ], + "task_id": 409, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080/f/books/59421/friendly-reminder-bookshop-org-exists", + "geolocation": null, + "intent_template": "Reply to {{position_description}} in this post with \"{{content_description}}\"", + "instantiation_dict": { + "position_description": "the manager of the website", + "content_description": "thanks! I am a big fan of your website." + }, + "intent": "Reply to the manager of the website in this post with \"thanks! I am a big fan of your website.\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/books/59421/-/comment/1235250", + "locator": "", + "required_contents": { + "must_include": [ + "thanks! I am a big fan of your website." + ] + } + } + ] + }, + "intent_template_id": 23 + }, + { + "sites": [ + "reddit" + ], + "task_id": 410, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080/f/singularity/69404/this-is-how-chatgpt-sees-itself", + "geolocation": null, + "intent_template": "Reply to {{position_description}} in this post with \"{{content_description}}\"", + "instantiation_dict": { + "position_description": "the first reply", + "content_description": "don't panic" + }, + "intent": "Reply to the first reply in this post with \"don't panic\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/singularity/69404/-/comment/1042264", + "locator": "", + "required_contents": { + "must_include": [ + "don't panic" + ] + } + } + ] + }, + "intent_template_id": 23 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 411, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Make the LICENSE of {{repo}} to MIT license.", + "instantiation_dict": { + "repo": "byteblaze/cloud-to-butt" + }, + "intent": "Make the LICENSE of byteblaze/cloud-to-butt to MIT license.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/cloud-to-butt/-/blob/master/LICENSE.txt", + "locator": "", + "required_contents": { + "must_include": [ + "MIT license" + ] + } + } + ] + }, + "intent_template_id": 355 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 412, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Make the LICENSE of {{repo}} to Apache License", + "instantiation_dict": { + "repo": "byteblaze/accessible-html-content-patterns" + }, + "intent": "Make the LICENSE of byteblaze/accessible-html-content-patterns to Apache License", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/accessible-html-content-patterns/-/blob/main/LICENSE", + "locator": "", + "required_contents": { + "must_include": [ + "Apache License", + "http://www.apache.org/licenses/LICENSE-2.0" + ] + } + } + ] + }, + "intent_template_id": 355 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 413, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Make the LICENSE of {{repo}} to one that mandates all copies and derivative works to be under the same license", + "instantiation_dict": { + "repo": "byteblaze/a11y-syntax-highlighting" + }, + "intent": "Make the LICENSE of byteblaze/a11y-syntax-highlighting to one that mandates all copies and derivative works to be under the same license", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/a11y-syntax-highlighting/-/blob/main/LICENSE", + "locator": "", + "required_contents": { + "must_include": [ + "GENERAL PUBLIC LICENSE" + ] + } + } + ] + }, + "intent_template_id": 355 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 414, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Make the LICENSE of {{repo}} to MIT license.", + "instantiation_dict": { + "repo": "byteblaze/dotfiles" + }, + "intent": "Make the LICENSE of byteblaze/dotfiles to MIT license.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/dotfiles/-/blob/main/LICENSE", + "locator": "", + "required_contents": { + "must_include": [ + "MIT license", + "The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software." + ] + } + } + ] + }, + "intent_template_id": 355 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 415, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Go to the merge request on {{topic}} I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.", + "instantiation_dict": { + "topic": "verification functions" + }, + "intent": "Go to the merge request on verification functions I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/a11y-webring.club/-/merge_requests/40", + "locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.querySelector('.timeline-discussion-body').outerText", + "required_contents": { + "exact_match": "@davepgreene" + } + } + ] + }, + "intent_template_id": 360 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 416, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Go to the merge request on {{topic}} I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.", + "instantiation_dict": { + "topic": "wcag" + }, + "intent": "Go to the merge request on wcag I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/a11yproject/a11yproject.com/-/merge_requests/1270", + "locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.querySelector('.timeline-discussion-body').outerText", + "required_contents": { + "exact_match": "Thank you" + } + } + ] + }, + "intent_template_id": 360 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 417, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Go to the merge request on {{topic}} I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.", + "instantiation_dict": { + "topic": "404 link" + }, + "intent": "Go to the merge request on 404 link I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/a11yproject/a11yproject.com/-/merge_requests/1485", + "locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.querySelector('.timeline-discussion-body').outerText", + "required_contents": { + "exact_match": "@Roshanjossey" + } + } + ] + }, + "intent_template_id": 360 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 418, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Set my gitlab status as {{status}}.", + "instantiation_dict": { + "status": "Busy" + }, + "intent": "Set my gitlab status as Busy.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze", + "locator": "document.querySelector('.cover-status').lastChild.textContent", + "required_contents": { + "exact_match": "Busy" + } + } + ] + }, + "intent_template_id": 361 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 419, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Set my gitlab status as {{status}}.", + "instantiation_dict": { + "status": "Enjoying life" + }, + "intent": "Set my gitlab status as Enjoying life.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze", + "locator": "document.querySelector('.cover-status').lastChild.textContent", + "required_contents": { + "exact_match": "Enjoying life" + } + } + ] + }, + "intent_template_id": 361 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 420, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Set my gitlab status as {{status}}.", + "instantiation_dict": { + "status": "Playing Badminton" + }, + "intent": "Set my gitlab status as Playing Badminton.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze", + "locator": "document.querySelector('.cover-status').lastChild.textContent", + "required_contents": { + "exact_match": "Playing Badminton" + } + } + ] + }, + "intent_template_id": 361 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 421, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Set my gitlab status as {{status}}.", + "instantiation_dict": { + "status": "Resting due to leg injury" + }, + "intent": "Set my gitlab status as Resting due to leg injury.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze", + "locator": "document.querySelector('.cover-status').lastChild.textContent", + "required_contents": { + "exact_match": "Resting due to leg injury" + } + } + ] + }, + "intent_template_id": 361 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 422, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Set my gitlab status as {{status}}.", + "instantiation_dict": { + "status": "Out of Office" + }, + "intent": "Set my gitlab status as Out of Office.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze", + "locator": "document.querySelector('.cover-status').lastChild.textContent", + "required_contents": { + "exact_match": "Out of Office" + } + } + ] + }, + "intent_template_id": 361 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 423, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Mark all {{brand}} shirts on sale", + "instantiation_dict": { + "brand": "Hollister" + }, + "intent": "Mark all Hollister shirts on sale", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/126/", + "locator": "document.querySelector('input[name=\"product[sale]\"]').value", + "required_contents": { + "exact_match": "1" + } + } + ] + }, + "intent_template_id": 237 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 424, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the place where Mr. Rogers was filmed" + }, + "intent": "Find the page of the place where Mr. Rogers was filmed on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Pittsburgh" + ] + } + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 425, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the longest bridge in the Western hemisphere" + }, + "intent": "Find the page of the longest bridge in the Western hemisphere on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Mackinac Bridge" + ] + } + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 426, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the place in Pennsylvania where a plane crashed during the September 11th attacks" + }, + "intent": "Find the page of the place in Pennsylvania where a plane crashed during the September 11th attacks on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Somerset County" + ] + } + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 427, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the university that has most Turning Award winners" + }, + "intent": "Find the page of the university that has most Turning Award winners on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Massachusetts Institute of Technology" + ] + } + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 428, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the undergrad college of the person who developed the Nash equilibrium" + }, + "intent": "Find the page of the undergrad college of the person who developed the Nash equilibrium on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Carnegie Mellon University" + ] + } + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 429, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the colleges where The Chair was filmed in Pittsburgh" + }, + "intent": "Find the page of the colleges where The Chair was filmed in Pittsburgh on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Chatham University" + ] + } + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 430, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the college(s) where The Chair was filmed in Pennsylvania other than the ones in Pittsburgh" + }, + "intent": "Find the page of the college(s) where The Chair was filmed in Pennsylvania other than the ones in Pittsburgh on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Washington & Jefferson College" + ] + } + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "shopping" + ], + "task_id": 431, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/tall-pink-taper-candles-4-piece-orange-colored-tapered-candles-gradient-candles-10-6-inches-tall-tie-dye-candle-set-large-dripless-long-burning-candlesticks-two-color-taper-candles-candlesticks.html |AND| http://localhost:28082/spaas-white-taper-candles-4-pack-10-inch-tall-candles-scent-free-premium-wax-candle-sticks-8-hour-long-burning-white-candlesticks-for-home-decoration-wedding-holiday-and-parties.html |AND| http://localhost:28082/white-starfish-wall-candle-sconces-set-of-2-beach-decor-ocean-themed-wall-mount-candleholders-nautical-style-beach-bathroom-decor-coastal-farmhouse-seashell-candle-holders.html", + "geolocation": null, + "intent_template": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "instantiation_dict": {}, + "intent": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "SPAAS White Taper Candles - 4 Pack |OR| 10 Inch Tall Candles, Scent-Free Premium Wax Candle Sticks |OR| 8 Hour Long Burning White Candlesticks for Home Decoration, Wedding, Holiday and Parties" + ] + } + } + ] + }, + "intent_template_id": 145 + }, + { + "sites": [ + "shopping" + ], + "task_id": 432, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/ciclon-energy-drink-regular-24-cans-8-3oz.html |AND| http://localhost:28082/v8-energy-healthy-energy-drink-steady-energy-from-black-and-green-tea-pomegranate-blueberry-8-ounce-can-pack-of-24.html", + "geolocation": null, + "intent_template": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "instantiation_dict": {}, + "intent": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "V8 +Energy, Healthy Energy Drink, Steady Energy from Black and Green Tea, Pomegranate Blueberry, 8 Ounce Can ,Pack of 24" + ] + } + } + ] + }, + "intent_template_id": 145 + }, + { + "sites": [ + "shopping" + ], + "task_id": 433, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/tazrigo-5pcs-white-dental-resin-brush-pens-dental-shaping-silicone-tooth-tool.html |AND| http://localhost:28082/stylus-pens-for-touch-screens-2-pcs-universal-stylus-2-in-1-2022-updated-touch-screen-pens-for-all-touch-screens-cell-phones-tablets-laptops-with-6-replacement-tips-4-discstips-2-fiber-tips.html", + "geolocation": null, + "intent_template": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "instantiation_dict": {}, + "intent": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Tazrigo 5pcs White Dental Resin Brush Pens Dental Shaping Silicone Tooth Tool" + ] + } + } + ] + }, + "intent_template_id": 145 + }, + { + "sites": [ + "shopping" + ], + "task_id": 434, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/3-pairs-ruffle-socks-lace-ankle-socks-for-girls-frilly-socks-women-decorative.html |AND| http://localhost:28082/viviki-women-glitter-socks-ultrathin-transparent-tulle-lace-socks-no-show-ankle-crew-socks-3-pack.html", + "geolocation": null, + "intent_template": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "instantiation_dict": {}, + "intent": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "VIVIKI Women Glitter Socks Ultrathin Transparent Tulle Lace Socks - No Show Ankle Crew Socks 3 Pack" + ] + } + } + ] + }, + "intent_template_id": 145 + }, + { + "sites": [ + "shopping" + ], + "task_id": 435, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/35-ft-hdmi-cable-gearit-pro-series-hdmi-cable-35-feet-high-speed-ethernet-4k-resolution-3d-video-and-arc-audio-return-channel-hdmi-cable-white.html |AND| http://localhost:28082/dp-to-hdmi-cable-6ft-2-pack-fosmon-gold-plated-displayport-to-hdmi-cable-1080p-full-hd-for-pcs-to-hdtv-monitor-projector-with-hdmi-port.html", + "geolocation": null, + "intent_template": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "instantiation_dict": {}, + "intent": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "DP to HDMI Cable 6FT (2 Pack), Fosmon Gold Plated Displayport to HDMI Cable 1080p Full HD for PCs to HDTV, Monitor, Projector with HDMI Port" + ] + } + } + ] + }, + "intent_template_id": 145 + }, + { + "sites": [ + "shopping" + ], + "task_id": 436, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I previously ordered some {{product}} {{time}} and later cancelled. Can you reorder it for me?", + "instantiation_dict": { + "product": "a mattress foundation", + "time": "around Feb or March 2023" + }, + "intent": "I previously ordered some a mattress foundation around Feb or March 2023 and later cancelled. Can you reorder it for me?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07DFJ5XKH" + ] + } + } + ] + }, + "intent_template_id": 156 + }, + { + "sites": [ + "shopping" + ], + "task_id": 437, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I previously ordered some {{product}} {{time}} and later cancelled. Can you reorder it for me?", + "instantiation_dict": { + "product": "a table lamp", + "time": "in May 2023" + }, + "intent": "I previously ordered some a table lamp in May 2023 and later cancelled. Can you reorder it for me?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B072XS3F6W" + ] + } + } + ] + }, + "intent_template_id": 156 + }, + { + "sites": [ + "shopping" + ], + "task_id": 438, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I previously ordered some {{product}} {{time}} and later cancelled. Can you reorder it for me?", + "instantiation_dict": { + "product": "a TV stand", + "time": "sometime around sep 2022" + }, + "intent": "I previously ordered some a TV stand sometime around sep 2022 and later cancelled. Can you reorder it for me?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B08PVHRRB7" + ] + } + } + ] + }, + "intent_template_id": 156 + }, + { + "sites": [ + "shopping" + ], + "task_id": 439, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I previously ordered some {{product}} {{time}} and later cancelled. Can you reorder it for me?", + "instantiation_dict": { + "product": "a cat t-shirt", + "time": "during 2022" + }, + "intent": "I previously ordered some a cat t-shirt during 2022 and later cancelled. Can you reorder it for me?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B0844BWS76" + ] + } + } + ] + }, + "intent_template_id": 156 + }, + { + "sites": [ + "shopping" + ], + "task_id": 440, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I previously ordered some {{product}} {{time}} and later cancelled. Can you reorder it for me?", + "instantiation_dict": { + "product": "a make up removal kit", + "time": "during summer 2022" + }, + "intent": "I previously ordered some a make up removal kit during summer 2022 and later cancelled. Can you reorder it for me?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B0738JQG6Q" + ] + } + } + ] + }, + "intent_template_id": 156 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 441, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/byteblaze/gimmiethat.space", + "geolocation": null, + "intent_template": "Update the project site's title to \"{{title}}\"", + "instantiation_dict": { + "title": "GIVE ME SPACE" + }, + "intent": "Update the project site's title to \"GIVE ME SPACE\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/gimmiethat.space/-/raw/main/index.html", + "locator": "", + "required_contents": { + "must_include": [ + "GIVE ME SPACE" + ] + } + } + ] + }, + "intent_template_id": 308 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 442, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/byteblaze/gimmiethat.space", + "geolocation": null, + "intent_template": "Update the project site's title to \"{{title}}\"", + "instantiation_dict": { + "title": "Welcome to my site" + }, + "intent": "Update the project site's title to \"Welcome to my site\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/gimmiethat.space/-/raw/main/index.html", + "locator": "", + "required_contents": { + "must_include": [ + "Welcome to my site" + ] + } + } + ] + }, + "intent_template_id": 308 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 443, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/byteblaze/gimmiethat.space", + "geolocation": null, + "intent_template": "Update the project site's title to \"{{title}}\"", + "instantiation_dict": { + "title": "Not an interesting site" + }, + "intent": "Update the project site's title to \"Not an interesting site\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/gimmiethat.space/-/raw/main/index.html", + "locator": "", + "required_contents": { + "must_include": [ + "Not an interesting site" + ] + } + } + ] + }, + "intent_template_id": 308 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 444, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/byteblaze/gimmiethat.space", + "geolocation": null, + "intent_template": "Update the project site's title to \"{{title}}\"", + "instantiation_dict": { + "title": "Title Wanted" + }, + "intent": "Update the project site's title to \"Title Wanted\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/gimmiethat.space/-/raw/main/index.html", + "locator": "", + "required_contents": { + "must_include": [ + "Title Wanted" + ] + } + } + ] + }, + "intent_template_id": 308 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 445, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/byteblaze/gimmiethat.space", + "geolocation": null, + "intent_template": "Update the project site's title to \"{{title}}\"", + "instantiation_dict": { + "title": "Hello" + }, + "intent": "Update the project site's title to \"Hello\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/gimmiethat.space/-/raw/main/index.html", + "locator": "", + "required_contents": { + "must_include": [ + "Hello" + ] + } + } + ] + }, + "intent_template_id": 308 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 446, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Assign the issue regarding {{issue}} in {{repo}} to {{account}}.", + "instantiation_dict": { + "repo": "a11yproject", + "issue": 404, + "account": "Roshanjossey" + }, + "intent": "Assign the issue regarding 404 in a11yproject to Roshanjossey.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/dashboard/issues?scope=all&state=opened&assignee_username=Roshanjossey", + "locator": "", + "required_contents": { + "must_include": [ + "404s, bad host, timeouts, bad urls for URLs linked from website" + ] + } + } + ] + }, + "intent_template_id": 999 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 447, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Assign the issue regarding {{issue}} in {{repo}} to {{account}}.", + "instantiation_dict": { + "repo": "a11y-webring.club", + "issue": "linking to an accessibility statement", + "account": "Rohan" + }, + "intent": "Assign the issue regarding linking to an accessibility statement in a11y-webring.club to Rohan.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/dashboard/issues?scope=all&state=opened&assignee_username=Seirdy", + "locator": "", + "required_contents": { + "must_include": [ + "linking to an accessibility statement" + ] + } + } + ] + }, + "intent_template_id": 999 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 448, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "set the homepage URL on my GitLab profile to {{url}}", + "instantiation_dict": { + "url": "https://egg.tart.com" + }, + "intent": "set the homepage URL on my GitLab profile to https://egg.tart.com", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze", + "locator": "document.querySelector('.profile-header [itemprop=\"url\"]').outerText", + "required_contents": { + "exact_match": "egg.tart.com" + } + } + ] + }, + "intent_template_id": 331 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 449, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "set the homepage URL on my GitLab profile to {{url}}", + "instantiation_dict": { + "url": "https://helloworld.xyz" + }, + "intent": "set the homepage URL on my GitLab profile to https://helloworld.xyz", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze", + "locator": "document.querySelector('.profile-header [itemprop=\"url\"]').outerText", + "required_contents": { + "exact_match": "helloworld.xyz" + } + } + ] + }, + "intent_template_id": 331 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 450, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "set the homepage URL on my GitLab profile to {{url}}", + "instantiation_dict": { + "url": "a11yproject.contributor.me" + }, + "intent": "set the homepage URL on my GitLab profile to a11yproject.contributor.me", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze", + "locator": "document.querySelector('.profile-header [itemprop=\"url\"]').outerText", + "required_contents": { + "exact_match": "a11yproject.contributor.me" + } + } + ] + }, + "intent_template_id": 331 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 451, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "set the homepage URL on my GitLab profile to {{url}}", + "instantiation_dict": { + "url": "www.byteblaze.com" + }, + "intent": "set the homepage URL on my GitLab profile to www.byteblaze.com", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze", + "locator": "document.querySelector('.profile-header [itemprop=\"url\"]').outerText", + "required_contents": { + "exact_match": "www.byteblaze.com" + } + } + ] + }, + "intent_template_id": 331 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 452, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "set the homepage URL on my GitLab profile to {{url}}", + "instantiation_dict": { + "url": "byteblaze.github.io" + }, + "intent": "set the homepage URL on my GitLab profile to byteblaze.github.io", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze", + "locator": "document.querySelector('.profile-header [itemprop=\"url\"]').outerText", + "required_contents": { + "exact_match": "byteblaze.github.io" + } + } + ] + }, + "intent_template_id": 331 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 453, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Disable {{product}} from the site, they are facing some quality issues.", + "instantiation_dict": { + "product": "Teton pullover hoodie" + }, + "intent": "Disable Teton pullover hoodie from the site, they are facing some quality issues.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/78/", + "locator": "document.querySelector('[name=\"product[status]\"').value", + "required_contents": { + "exact_match": "2" + } + } + ] + }, + "intent_template_id": 242 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 454, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Disable {{product}} from the site, they are facing some quality issues.", + "instantiation_dict": { + "product": "Ryker Tee Crew Neck" + }, + "intent": "Disable Ryker Tee Crew Neck from the site, they are facing some quality issues.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/478/", + "locator": "document.querySelector('[name=\"product[status]\"').value", + "required_contents": { + "exact_match": "2" + } + } + ] + }, + "intent_template_id": 242 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 455, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Disable {{product}} from the site, they are facing some quality issues.", + "instantiation_dict": { + "product": "lHelios Endurance Tank" + }, + "intent": "Disable lHelios Endurance Tank from the site, they are facing some quality issues.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/676/", + "locator": "document.querySelector('[name=\"product[status]\"').value", + "required_contents": { + "exact_match": "2" + } + } + ] + }, + "intent_template_id": 242 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 456, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Disable {{product}} from the site, they are facing some quality issues.", + "instantiation_dict": { + "product": "Cora Pant" + }, + "intent": "Disable Cora Pant from the site, they are facing some quality issues.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1840/", + "locator": "document.querySelector('[name=\"product[status]\"').value", + "required_contents": { + "exact_match": "2" + } + } + ] + }, + "intent_template_id": 242 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 457, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Disable {{product}} from the site, they are facing some quality issues.", + "instantiation_dict": { + "product": "Karmen yoga pants" + }, + "intent": "Disable Karmen yoga pants from the site, they are facing some quality issues.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1819/", + "locator": "document.querySelector('[name=\"product[status]\"').value", + "required_contents": { + "exact_match": "2" + } + } + ] + }, + "intent_template_id": 242 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 458, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin/catalog/product/edit/id/1481/", + "geolocation": null, + "intent_template": "{{action}} the price of this product by {{amount}}", + "instantiation_dict": { + "amount": "$5", + "action": "Reduce" + }, + "intent": "Reduce the price of this product by $5", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1481/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "27.00" + } + } + ] + }, + "intent_template_id": 247 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 459, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin/catalog/product/edit/id/237/", + "geolocation": null, + "intent_template": "{{action}} the price of this product by {{amount}}", + "instantiation_dict": { + "amount": "10%", + "action": "Reduce" + }, + "intent": "Reduce the price of this product by 10%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/237/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "62.10" + } + } + ] + }, + "intent_template_id": 247 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 460, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin/catalog/product/edit/id/418/", + "geolocation": null, + "intent_template": "{{action}} the price of this product by {{amount}}", + "instantiation_dict": { + "amount": "15%", + "action": "Reduce" + }, + "intent": "Reduce the price of this product by 15%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/418/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "38.25" + } + } + ] + }, + "intent_template_id": 247 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 461, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin/catalog/product/edit/id/721/", + "geolocation": null, + "intent_template": "{{action}} the price of this product by {{amount}}", + "instantiation_dict": { + "amount": "$11.5", + "action": "Increase" + }, + "intent": "Increase the price of this product by $11.5", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/721/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "29.50" + } + } + ] + }, + "intent_template_id": 247 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 462, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin/catalog/product/edit/id/710/", + "geolocation": null, + "intent_template": "{{action}} the price of this product by {{amount}}", + "instantiation_dict": { + "amount": "10%", + "action": "Increase" + }, + "intent": "Increase the price of this product by 10%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/710/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "19.80" + } + } + ] + }, + "intent_template_id": 247 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 463, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin/catalog/product/edit/id/996/", + "geolocation": null, + "intent_template": "{{action}} the price of this product by {{amount}}", + "instantiation_dict": { + "amount": "15%", + "action": "Increase" + }, + "intent": "Increase the price of this product by 15%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/996/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "36.80" + } + } + ] + }, + "intent_template_id": 247 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 464, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Update the product description of {{product}} to highlight the real user positive reviews by quoting the comments", + "instantiation_dict": { + "product": "Antonia Racer Tank" + }, + "intent": "Update the product description of Antonia Racer Tank to highlight the real user positive reviews by quoting the comments", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/../antonia-racer-tank.html", + "locator": "document.querySelector('.data.item.content').outerText + (document.querySelector('.product.attribute.overview [itemprop=\"description\"]')?.outerText || '')", + "required_contents": { + "must_include": [ + "This is in regular rotation at the gym", + "Its colorful and looks kinda cute under my exercise tanks", + "it's very stylish for yoga or something else low impact" + ] + } + } + ] + }, + "intent_template_id": 251 + }, + { + "sites": [ + "shopping" + ], + "task_id": 465, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Add {{product}} to my wish list", + "instantiation_dict": { + "product": "Tide PODS Spring Meadow Scent HE Turbo Laundry Detergent Pacs, 81 Count" + }, + "intent": "Add Tide PODS Spring Meadow Scent HE Turbo Laundry Detergent Pacs, 81 Count to my wish list", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "Tide PODS Spring Meadow Scent HE Turbo Laundry Detergent Pacs, 81 Count" + ] + } + } + ] + }, + "intent_template_id": 186 + }, + { + "sites": [ + "shopping" + ], + "task_id": 466, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Add {{product}} to my wish list", + "instantiation_dict": { + "product": "2 Hawaiian Bamboo Orchid Roots #zc50 - by Discount Hawaiian Gifts" + }, + "intent": "Add 2 Hawaiian Bamboo Orchid Roots #zc50 - by Discount Hawaiian Gifts to my wish list", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "2 Hawaiian Bamboo Orchid Roots #zc50 - by Discount Hawaiian Gifts" + ] + } + } + ] + }, + "intent_template_id": 186 + }, + { + "sites": [ + "shopping" + ], + "task_id": 467, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Add {{product}} to my wish list", + "instantiation_dict": { + "product": "HONGJ Hawaiian Beach Outfits Set for Mens, Summer Tropical Tree Printed Relaxed-fit Hawaii Shirts Shorts 2 Piece Suits" + }, + "intent": "Add HONGJ Hawaiian Beach Outfits Set for Mens, Summer Tropical Tree Printed Relaxed-fit Hawaii Shirts Shorts 2 Piece Suits to my wish list", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "HONGJ Hawaiian Beach Outfits Set for Mens, Summer Tropical Tree Printed Relaxed-fit Hawaii Shirts Shorts 2 Piece Suits" + ] + } + } + ] + }, + "intent_template_id": 186 + }, + { + "sites": [ + "shopping" + ], + "task_id": 468, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Add {{product}} to my wish list", + "instantiation_dict": { + "product": "DkRgVNY Lace Spcling Lingerie Womens Sexy Hollow Out Underwear Bodysuit One Piece Snap Crotch Clubwear Teddy Bodysuit" + }, + "intent": "Add DkRgVNY Lace Spcling Lingerie Womens Sexy Hollow Out Underwear Bodysuit One Piece Snap Crotch Clubwear Teddy Bodysuit to my wish list", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "DkRgVNY Lace Spcling Lingerie Womens Sexy Hollow Out Underwear Bodysuit One Piece Snap Crotch Clubwear Teddy Bodysuit" + ] + } + } + ] + }, + "intent_template_id": 186 + }, + { + "sites": [ + "shopping" + ], + "task_id": 469, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Add {{product}} to my wish list", + "instantiation_dict": { + "product": "Light Blue Simple Summer New Low Heels Slippers for Women Fashion Chunky Heels Pointed Toe Wine Glasses Sandals Comfortable Walking Shoes Ladies All-Match Sexy Party Shoes" + }, + "intent": "Add Light Blue Simple Summer New Low Heels Slippers for Women Fashion Chunky Heels Pointed Toe Wine Glasses Sandals Comfortable Walking Shoes Ladies All-Match Sexy Party Shoes to my wish list", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "Light Blue Simple Summer New Low Heels Slippers for Women Fashion Chunky Heels Pointed Toe Wine Glasses Sandals Comfortable Walking Shoes Ladies All-Match Sexy Party Shoes" + ] + } + } + ] + }, + "intent_template_id": 186 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 470, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Cancel order {{id}}", + "instantiation_dict": { + "id": "302" + }, + "intent": "Cancel order 302", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/view/order_id/302/", + "locator": "document.querySelector(\"#order_status\").outerText", + "required_contents": { + "exact_match": "Canceled" + } + } + ] + }, + "intent_template_id": 257 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 471, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Cancel order {{id}}", + "instantiation_dict": { + "id": "307" + }, + "intent": "Cancel order 307", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/view/order_id/307/", + "locator": "document.querySelector(\"#order_status\").outerText", + "required_contents": { + "exact_match": "Canceled" + } + } + ] + }, + "intent_template_id": 257 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 472, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Cancel order {{id}}", + "instantiation_dict": { + "id": "299" + }, + "intent": "Cancel order 299", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/view/order_id/299/", + "locator": "document.querySelector(\"#order_status\").outerText", + "required_contents": { + "exact_match": "Canceled" + } + } + ] + }, + "intent_template_id": 257 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 473, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Cancel order {{id}}", + "instantiation_dict": { + "id": "301" + }, + "intent": "Cancel order 301", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/view/order_id/301/", + "locator": "document.querySelector(\"#order_status\").outerText", + "required_contents": { + "exact_match": "Canceled" + } + } + ] + }, + "intent_template_id": 257 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 474, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Cancel order {{id}}", + "instantiation_dict": { + "id": "305" + }, + "intent": "Cancel order 305", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/view/order_id/305/", + "locator": "document.querySelector(\"#order_status\").outerText", + "required_contents": { + "exact_match": "Canceled" + } + } + ] + }, + "intent_template_id": 257 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 475, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Set up a new, empty repository with the name {{project_name}}?", + "instantiation_dict": { + "project_name": "chatgpt_plugin" + }, + "intent": "Set up a new, empty repository with the name chatgpt_plugin?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/chatgpt_plugin", + "locator": "", + "required_contents": { + "must_include": [ + "chatgpt_plugin" + ] + } + } + ] + }, + "intent_template_id": 292 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 476, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Set up a new, empty repository with the name {{project_name}}?", + "instantiation_dict": { + "project_name": "awesome_llm_reading" + }, + "intent": "Set up a new, empty repository with the name awesome_llm_reading?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/awesome_llm_reading", + "locator": "", + "required_contents": { + "must_include": [ + "awesome_llm_reading" + ] + } + } + ] + }, + "intent_template_id": 292 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 477, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Set up a new, empty repository with the name {{project_name}}?", + "instantiation_dict": { + "project_name": "awesome_program_aided_reasoning" + }, + "intent": "Set up a new, empty repository with the name awesome_program_aided_reasoning?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/awesome_program_aided_reasoning", + "locator": "", + "required_contents": { + "must_include": [ + "awesome_program_aided_reasoning" + ] + } + } + ] + }, + "intent_template_id": 292 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 478, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Set up a new, empty repository with the name {{project_name}}?", + "instantiation_dict": { + "project_name": "webagent" + }, + "intent": "Set up a new, empty repository with the name webagent?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/webagent", + "locator": "", + "required_contents": { + "must_include": [ + "webagent" + ] + } + } + ] + }, + "intent_template_id": 292 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 479, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Set up a new, empty repository with the name {{project_name}}?", + "instantiation_dict": { + "project_name": "awesome_webagent" + }, + "intent": "Set up a new, empty repository with the name awesome_webagent?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/awesome_webagent", + "locator": "", + "required_contents": { + "must_include": [ + "awesome_webagent" + ] + } + } + ] + }, + "intent_template_id": 292 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 480, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Invite {{collaborator_account_list}} as collaborator to {{repo}}", + "instantiation_dict": { + "collaborator_account_list": "yjlou", + "repo": "solarized-prism-theme" + }, + "intent": "Invite yjlou as collaborator to solarized-prism-theme", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/solarized-prism-theme/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "yjlou" + ] + } + } + ] + }, + "intent_template_id": 293 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 481, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "{{name}} wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "instantiation_dict": { + "name": "Abishek" + }, + "intent": "Abishek wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/dotfiles/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'abisubramanya27')", + "required_contents": { + "must_include": [ + "Guest" + ] + } + } + ] + }, + "intent_template_id": 294 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 482, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "{{name}} wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "instantiation_dict": { + "name": "yjlou" + }, + "intent": "yjlou wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/dotfiles/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'yjlou')", + "required_contents": { + "must_include": [ + "Guest" + ] + } + } + ] + }, + "intent_template_id": 294 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 483, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "{{name}} wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "instantiation_dict": { + "name": "Koushik" + }, + "intent": "Koushik wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/dotfiles/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'koush')", + "required_contents": { + "must_include": [ + "Guest" + ] + } + } + ] + }, + "intent_template_id": 294 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 484, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "{{name}} wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "instantiation_dict": { + "name": "Jakub Klinkovsk\u00fd" + }, + "intent": "Jakub Klinkovsk\u00fd wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/dotfiles/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'lahwaacz')", + "required_contents": { + "must_include": [ + "Guest" + ] + } + } + ] + }, + "intent_template_id": 294 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 485, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "{{name}} wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "instantiation_dict": { + "name": "Vinta" + }, + "intent": "Vinta wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/dotfiles/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'vinta')", + "required_contents": { + "must_include": [ + "Guest" + ] + } + } + ] + }, + "intent_template_id": 294 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 486, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Change the page title of \"{{old-heading}}\" page on my site to \"{{heading}}\".", + "instantiation_dict": { + "old-heading": "404 Not Found", + "heading": "Bruh bro you clicked the wrong page" + }, + "intent": "Change the page title of \"404 Not Found\" page on my site to \"Bruh bro you clicked the wrong page\".", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/cms/page/edit/page_id/1/", + "locator": "document.querySelector('input[name=\"title\"').value", + "required_contents": { + "exact_match": "Bruh bro you clicked the wrong page" + } + } + ] + }, + "intent_template_id": 275 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 487, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Change the page title of \"{{old-heading}}\" page on my site to \"{{heading}}\".", + "instantiation_dict": { + "old-heading": "Enable Cookies", + "heading": "Cookie monster coming to your place" + }, + "intent": "Change the page title of \"Enable Cookies\" page on my site to \"Cookie monster coming to your place\".", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/cms/page/edit/page_id/3/", + "locator": "document.querySelector('input[name=\"title\"').value", + "required_contents": { + "exact_match": "Cookie monster coming to your place" + } + } + ] + }, + "intent_template_id": 275 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 488, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Change the page title of \"{{old-heading}}\" page on my site to \"{{heading}}\".", + "instantiation_dict": { + "old-heading": "Home Page", + "heading": "This is the home page!! Leave here!!" + }, + "intent": "Change the page title of \"Home Page\" page on my site to \"This is the home page!! Leave here!!\".", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/cms/page/edit/page_id/2/", + "locator": "document.querySelector('input[name=\"title\"').value", + "required_contents": { + "exact_match": "This is the home page!! Leave here!!" + } + } + ] + }, + "intent_template_id": 275 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 489, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Change the page title of \"{{old-heading}}\" page on my site to \"{{heading}}\".", + "instantiation_dict": { + "old-heading": "Privacy Policy", + "heading": "No privacy policy is needed is this dystopian world" + }, + "intent": "Change the page title of \"Privacy Policy\" page on my site to \"No privacy policy is needed is this dystopian world\".", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/cms/page/edit/page_id/4/", + "locator": "document.querySelector('input[name=\"title\"').value", + "required_contents": { + "exact_match": "No privacy policy is needed is this dystopian world" + } + } + ] + }, + "intent_template_id": 275 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 490, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Change the page title of \"{{old-heading}}\" page on my site to \"{{heading}}\".", + "instantiation_dict": { + "old-heading": "About us", + "heading": "Secret" + }, + "intent": "Change the page title of \"About us\" page on my site to \"Secret\".", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/cms/page/edit/page_id/5/", + "locator": "document.querySelector('input[name=\"title\"').value", + "required_contents": { + "exact_match": "Secret" + } + } + ] + }, + "intent_template_id": 275 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 491, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Notify {{name}} in their most recent pending order with message \"{{message}}\"", + "instantiation_dict": { + "name": "Sarah Miller", + "message": "the order is ready to be shipped soon!" + }, + "intent": "Notify Sarah Miller in their most recent pending order with message \"the order is ready to be shipped soon!\"", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "System message: We cannot add order history." + }, + "intent_template_id": 280 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 492, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Notify {{name}} in their most recent pending order with message \"{{message}}\"", + "instantiation_dict": { + "name": "Jane Doe", + "message": "sorry we are out of stock, please reorder" + }, + "intent": "Notify Jane Doe in their most recent pending order with message \"sorry we are out of stock, please reorder\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/view/order_id/302/", + "locator": "document.querySelector(\"#order_history_block\").querySelector(\".note-list\").firstElementChild.querySelector(\".note-list-comment\").outerText", + "required_contents": { + "exact_match": "sorry we are out of stock, please reorder" + } + } + ] + }, + "intent_template_id": 280 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 493, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Notify {{name}} in their most recent pending order with message \"{{message}}\"", + "instantiation_dict": { + "name": "Grace Nguyen", + "message": "sorry we are bankrupt, please contact our customer service for refund" + }, + "intent": "Notify Grace Nguyen in their most recent pending order with message \"sorry we are bankrupt, please contact our customer service for refund\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/view/order_id/307/", + "locator": "document.querySelector(\"#order_history_block\").querySelector(\".note-list\").firstElementChild.querySelector(\".note-list-comment\").outerText", + "required_contents": { + "exact_match": "sorry we are bankrupt, please contact our customer service for refund" + } + } + ] + }, + "intent_template_id": 280 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 494, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Notify {{name}} in their most recent pending order with message \"{{message}}\"", + "instantiation_dict": { + "name": "Alex Thomas", + "message": "Yo, your order will be shipped soon!" + }, + "intent": "Notify Alex Thomas in their most recent pending order with message \"Yo, your order will be shipped soon!\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/view/order_id/304/", + "locator": "document.querySelector(\"#order_history_block\").querySelector(\".note-list\").firstElementChild.querySelector(\".note-list-comment\").outerText", + "required_contents": { + "exact_match": "Yo, your order will be shipped soon!" + } + } + ] + }, + "intent_template_id": 280 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 495, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Notify {{name}} in their most recent pending order with message \"{{message}}\"", + "instantiation_dict": { + "name": "Lily Potter", + "message": "Thanks, your order is ready to be shipped!" + }, + "intent": "Notify Lily Potter in their most recent pending order with message \"Thanks, your order is ready to be shipped!\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/view/order_id/303/", + "locator": "document.querySelector(\"#order_history_block\").querySelector(\".note-list\").firstElementChild.querySelector(\".note-list-comment\").outerText", + "required_contents": { + "exact_match": "Thanks, your order is ready to be shipped!" + } + } + ] + }, + "intent_template_id": 280 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 496, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Update order #{{order}} with the {{service}} tracking number {{tracking}}", + "instantiation_dict": { + "tracking": "8974568499", + "order": "299", + "service": "Federal Express" + }, + "intent": "Update order #299 with the Federal Express tracking number 8974568499", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/commentsHistory/order_id/299/active_tab/order_shipments/", + "locator": "", + "required_contents": { + "must_include": [ + "Tracking number 8974568499 for Federal Express assigned" + ] + } + } + ] + }, + "intent_template_id": 284 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 497, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Update order #{{order}} with the {{service}} tracking number {{tracking}}", + "instantiation_dict": { + "tracking": "24353446464", + "order": "307", + "service": "DHL" + }, + "intent": "Update order #307 with the DHL tracking number 24353446464", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/commentsHistory/order_id/307/active_tab/order_shipments/", + "locator": "", + "required_contents": { + "must_include": [ + "Tracking number 24353446464 for DHL assigned" + ] + } + } + ] + }, + "intent_template_id": 284 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 498, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Update order #{{order}} with the {{service}} tracking number {{tracking}}", + "instantiation_dict": { + "tracking": "55591023930", + "order": "306", + "service": "UPS" + }, + "intent": "Update order #306 with the UPS tracking number 55591023930", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/commentsHistory/order_id/306/active_tab/order_shipments/", + "locator": "", + "required_contents": { + "must_include": [ + "Tracking number 55591023930 for United Parcel Service assigned" + ] + } + } + ] + }, + "intent_template_id": 284 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 499, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Update order #{{order}} with the {{service}} tracking number {{tracking}}", + "instantiation_dict": { + "tracking": "13849373987", + "order": "304", + "service": "USPS" + }, + "intent": "Update order #304 with the USPS tracking number 13849373987", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/commentsHistory/order_id/304/active_tab/order_shipments/", + "locator": "", + "required_contents": { + "must_include": [ + "Tracking number 13849373987 for United States Postal Service assigned" + ] + } + } + ] + }, + "intent_template_id": 284 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 500, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Update order #{{order}} with the {{service}} tracking number {{tracking}}", + "instantiation_dict": { + "tracking": "239028439840", + "order": "301", + "service": "DHL" + }, + "intent": "Update order #301 with the DHL tracking number 239028439840", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/commentsHistory/order_id/301/active_tab/order_shipments/", + "locator": "", + "required_contents": { + "must_include": [ + "Tracking number 239028439840 for DHL assigned" + ] + } + } + ] + }, + "intent_template_id": 284 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 501, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Make all {{product}} as out of stock", + "instantiation_dict": { + "product": "Taurus Elements Shell" + }, + "intent": "Make all Taurus Elements Shell as out of stock", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/350/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value", + "required_contents": { + "exact_match": "0" + } + } + ] + }, + "intent_template_id": 287 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 502, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Make all {{product}} as out of stock", + "instantiation_dict": { + "product": "Gobi HeatTec Tee" + }, + "intent": "Make all Gobi HeatTec Tee as out of stock", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/446/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value", + "required_contents": { + "exact_match": "0" + } + } + ] + }, + "intent_template_id": 287 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 503, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Make all {{product}} as out of stock", + "instantiation_dict": { + "product": "rocco gym tank" + }, + "intent": "Make all rocco gym tank as out of stock", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/682/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value", + "required_contents": { + "exact_match": "0" + } + } + ] + }, + "intent_template_id": 287 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 504, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Make all {{product}} as out of stock", + "instantiation_dict": { + "product": "Selene yoga hoodie" + }, + "intent": "Make all Selene yoga hoodie as out of stock", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1108/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value", + "required_contents": { + "exact_match": "0" + } + } + ] + }, + "intent_template_id": 287 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 505, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Make all {{product}} as out of stock", + "instantiation_dict": { + "product": "Aeno capri" + }, + "intent": "Make all Aeno capri as out of stock", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1861/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value", + "required_contents": { + "exact_match": "0" + } + } + ] + }, + "intent_template_id": 287 + }, + { + "sites": [ + "shopping" + ], + "task_id": 506, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Buy the highest rated product from the {{product_category}} category within a budget {{dollar_value}}.", + "instantiation_dict": { + "product_category": "meat substitute", + "dollar_value": "between 100 and 200" + }, + "intent": "Buy the highest rated product from the meat substitute category within a budget between 100 and 200.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B01CTR3DLE" + ] + } + } + ] + }, + "intent_template_id": 172 + }, + { + "sites": [ + "shopping" + ], + "task_id": 507, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Buy the highest rated product from the {{product_category}} category within a budget {{dollar_value}}.", + "instantiation_dict": { + "product_category": "Ceiling light", + "dollar_value": "above 1000" + }, + "intent": "Buy the highest rated product from the Ceiling light category within a budget above 1000.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07BVL3P1V" + ] + } + } + ] + }, + "intent_template_id": 172 + }, + { + "sites": [ + "shopping" + ], + "task_id": 508, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Buy the highest rated product from the {{product_category}} category within a budget {{dollar_value}}.", + "instantiation_dict": { + "product_category": "NS switch pouch", + "dollar_value": "under 60" + }, + "intent": "Buy the highest rated product from the NS switch pouch category within a budget under 60.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07116LGP6" + ] + } + } + ] + }, + "intent_template_id": 172 + }, + { + "sites": [ + "shopping" + ], + "task_id": 509, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Buy the best rating product from \"{{category}}\" category with at least 5 reviews and the product is least expensive", + "instantiation_dict": { + "category": "Men's shoe" + }, + "intent": "Buy the best rating product from \"Men's shoe\" category with at least 5 reviews and the product is least expensive", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B01J4MM3KO" + ] + } + } + ] + }, + "intent_template_id": 216 + }, + { + "sites": [ + "shopping" + ], + "task_id": 510, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Buy the best rating product from \"{{category}}\" category with at least 5 reviews and the product is least expensive", + "instantiation_dict": { + "category": "Home Audio Speaker" + }, + "intent": "Buy the best rating product from \"Home Audio Speaker\" category with at least 5 reviews and the product is least expensive", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B002R5ABIW" + ] + } + } + ] + }, + "intent_template_id": 216 + }, + { + "sites": [ + "shopping" + ], + "task_id": 511, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Add a {{product}} to my wish list.", + "instantiation_dict": { + "product": "laundry detergent" + }, + "intent": "Add a laundry detergent to my wish list.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "laundry", + "detergent" + ] + } + } + ] + }, + "intent_template_id": 189 + }, + { + "sites": [ + "shopping" + ], + "task_id": 512, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Add a {{product}} to my wish list.", + "instantiation_dict": { + "product": "toothpaste" + }, + "intent": "Add a toothpaste to my wish list.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "toothpaste" + ] + } + } + ] + }, + "intent_template_id": 189 + }, + { + "sites": [ + "shopping" + ], + "task_id": 513, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Add a {{product}} to my wish list.", + "instantiation_dict": { + "product": "chair" + }, + "intent": "Add a chair to my wish list.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "chair" + ] + } + } + ] + }, + "intent_template_id": 189 + }, + { + "sites": [ + "shopping" + ], + "task_id": 514, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Add a {{product}} to my wish list.", + "instantiation_dict": { + "product": "white desk" + }, + "intent": "Add a white desk to my wish list.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "white", + "desk" + ] + } + } + ] + }, + "intent_template_id": 189 + }, + { + "sites": [ + "shopping" + ], + "task_id": 515, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Add a {{product}} to my wish list.", + "instantiation_dict": { + "product": "white computer desk" + }, + "intent": "Add a white computer desk to my wish list.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "white", + "computer", + "desk" + ] + } + } + ] + }, + "intent_template_id": 189 + }, + { + "sites": [ + "shopping" + ], + "task_id": 516, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/elmwood-inn-fine-teas-orange-vanilla-caffeine-free-fruit-infusion-16-ounce-pouch.html", + "geolocation": null, + "intent_template": "Add this product to my wishlist", + "instantiation_dict": {}, + "intent": "Add this product to my wishlist", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "Elmwood Inn Fine Teas, Orange Vanilla Caffeine-free Fruit Infusion, 16-Ounce Pouch" + ] + } + } + ] + }, + "intent_template_id": 196 + }, + { + "sites": [ + "shopping" + ], + "task_id": 517, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/skinit-decal-gaming-skin-compatible-with-xbox-one-s-console-and-controller-bundle-officially-licensed-nfl-baltimore-ravens-design.html", + "geolocation": null, + "intent_template": "Add this product to my wishlist", + "instantiation_dict": {}, + "intent": "Add this product to my wishlist", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "Skinit Decal Gaming Skin Compatible with Xbox One S Console and Controller Bundle - Officially Licensed NFL Baltimore Ravens Design" + ] + } + } + ] + }, + "intent_template_id": 196 + }, + { + "sites": [ + "shopping" + ], + "task_id": 518, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/sceptre-e195bd-srr-19-inch-720p-led-tv-true-black-2017.html", + "geolocation": null, + "intent_template": "Add this product to my wishlist", + "instantiation_dict": {}, + "intent": "Add this product to my wishlist", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "Sceptre E195BD-SRR 19-Inch 720P LED TV, True Black (2017)" + ] + } + } + ] + }, + "intent_template_id": 196 + }, + { + "sites": [ + "shopping" + ], + "task_id": 519, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/iphone-13-pro-max-case-neon-turtle-iphone-13-pro-max-cases-tempered-glass-back-soft-silicone-tpu-shock-protective-case-for-apple-iphone-13-pro-max.html", + "geolocation": null, + "intent_template": "Add this product to my wishlist", + "instantiation_dict": {}, + "intent": "Add this product to my wishlist", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "iPhone 13 Pro Max Case, Neon Turtle iPhone 13 Pro Max Cases, Tempered Glass Back+Soft Silicone TPU Shock Protective Case for Apple iPhone 13 Pro Max" + ] + } + } + ] + }, + "intent_template_id": 196 + }, + { + "sites": [ + "shopping" + ], + "task_id": 520, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/magnetic-metal-stainless-steel-d-pads-kits-directional-pad-replacement-parts-for-xbox-one-elite-controller-elite-series-2-xbox-one-xbox-one-s-x-controller.html", + "geolocation": null, + "intent_template": "Add this product to my wishlist", + "instantiation_dict": {}, + "intent": "Add this product to my wishlist", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "Magnetic Metal Stainless Steel D-pads Kits Directional Pad Replacement Parts for Xbox One Elite Controller, Elite Series 2, Xbox One, Xbox One S/X Controller" + ] + } + } + ] + }, + "intent_template_id": 196 + }, + { + "sites": [ + "shopping" + ], + "task_id": 521, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Subscribe to the newsletter of OneStopMarket", + "instantiation_dict": {}, + "intent": "Subscribe to the newsletter of OneStopMarket", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/newsletter/manage/", + "locator": "document.querySelector('[title=\"General Subscription\"').checked.toString()", + "required_contents": { + "exact_match": "true" + } + } + ] + }, + "intent_template_id": 199 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 522, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Fork {{repo}}.", + "instantiation_dict": { + "repo": "all repos from facebook" + }, + "intent": "Fork all repos from facebook.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/dashboard/projects", + "locator": "document.querySelector('[data-qa-selector=\"projects_list\"').outerText", + "required_contents": { + "must_include": [ + "create-react-app", + "buck" + ] + } + } + ] + }, + "intent_template_id": 352 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 523, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Star the top {{number}} most stared repos in Gitlab", + "instantiation_dict": { + "number": "five" + }, + "intent": "Star the top five most stared repos in Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/users/byteblaze/starred", + "locator": "", + "required_contents": { + "must_include": [ + "AndroidSlidingUpPanel", + "create-react-app", + "ffmpeg-python", + "PHP_XLSXWriter", + "AndroidAsync" + ] + } + } + ] + }, + "intent_template_id": 354 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 524, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Star the top {{number}} most stared repos in Gitlab", + "instantiation_dict": { + "number": "eight" + }, + "intent": "Star the top eight most stared repos in Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/users/byteblaze/starred", + "locator": "", + "required_contents": { + "must_include": [ + "AndroidSlidingUpPanel", + "create-react-app", + "ffmpeg-python", + "PHP_XLSXWriter", + "AndroidAsync", + "Pytorch-GAN", + "administrate", + "keycloak" + ] + } + } + ] + }, + "intent_template_id": 354 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 525, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Star the top {{number}} most stared repos in Gitlab", + "instantiation_dict": { + "number": "four" + }, + "intent": "Star the top four most stared repos in Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/users/byteblaze/starred", + "locator": "", + "required_contents": { + "must_include": [ + "AndroidSlidingUpPanel", + "create-react-app", + "ffmpeg-python", + "PHP_XLSXWriter" + ] + } + } + ] + }, + "intent_template_id": 354 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 526, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Star the top {{number}} most stared repos in Gitlab", + "instantiation_dict": { + "number": "three" + }, + "intent": "Star the top three most stared repos in Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/users/byteblaze/starred", + "locator": "", + "required_contents": { + "must_include": [ + "AndroidSlidingUpPanel", + "create-react-app", + "ffmpeg-python" + ] + } + } + ] + }, + "intent_template_id": 354 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 527, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Star the top {{number}} most stared repos in Gitlab", + "instantiation_dict": { + "number": "one" + }, + "intent": "Star the top one most stared repos in Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/users/byteblaze/starred", + "locator": "", + "required_contents": { + "must_include": [ + "AndroidSlidingUpPanel" + ] + } + } + ] + }, + "intent_template_id": 354 + }, + { + "sites": [ + "shopping" + ], + "task_id": 528, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Draft a refund message via their \"contact us\" form for the {{product}} I bought {{time}}. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "instantiation_dict": { + "product": "phone screen protector", + "time": "March 2023" + }, + "intent": "Draft a refund message via their \"contact us\" form for the phone screen protector I bought March 2023. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "refund", + "it broke after three days of use", + "000000180", + "12.99" + ] + } + } + ] + }, + "intent_template_id": 154 + }, + { + "sites": [ + "shopping" + ], + "task_id": 529, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Draft a refund message via their \"contact us\" form for the {{product}} I bought {{time}}. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "instantiation_dict": { + "product": "bluetooth speaker", + "time": "Feb 2023" + }, + "intent": "Draft a refund message via their \"contact us\" form for the bluetooth speaker I bought Feb 2023. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "refund", + "it broke after three days of use", + "000000148", + "169.95" + ] + } + } + ] + }, + "intent_template_id": 154 + }, + { + "sites": [ + "shopping" + ], + "task_id": 530, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Draft a refund message via their \"contact us\" form for the {{product}} I bought {{time}}. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "instantiation_dict": { + "product": "kitchen organizer", + "time": "around Feb 2023" + }, + "intent": "Draft a refund message via their \"contact us\" form for the kitchen organizer I bought around Feb 2023. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "refund", + "it broke after three days of use", + "000000161", + "68.88" + ] + } + } + ] + }, + "intent_template_id": 154 + }, + { + "sites": [ + "shopping" + ], + "task_id": 531, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Draft a refund message via their \"contact us\" form for the {{product}} I bought {{time}}. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "instantiation_dict": { + "product": "phone case", + "time": "March 2023" + }, + "intent": "Draft a refund message via their \"contact us\" form for the phone case I bought March 2023. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "refund", + "it broke after three days of use", + "000000180", + "$12.99" + ] + } + } + ] + }, + "intent_template_id": 154 + }, + { + "sites": [ + "shopping" + ], + "task_id": 532, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Draft a refund message via their \"contact us\" form for the {{product}} I bought {{time}}. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "instantiation_dict": { + "product": "PS3 remote controller", + "time": "early 2023" + }, + "intent": "Draft a refund message via their \"contact us\" form for the PS3 remote controller I bought early 2023. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "refund", + "it broke after three days of use", + "000000180", + "1.63" + ] + } + } + ] + }, + "intent_template_id": 154 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 533, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Follow {{account_list}} on Gitlab", + "instantiation_dict": { + "account_list": [ + "convexegg", + "yjlou" + ] + }, + "intent": "Follow ['convexegg', 'yjlou'] on Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": { + "must_include": [ + "@convexegg", + "@yjlou" + ] + } + } + ] + }, + "intent_template_id": 330 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 534, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Follow {{account_list}} on Gitlab", + "instantiation_dict": { + "account_list": [ + "Jakub Klinkovsk\u00fd", + "Koushik", + "Vinta Chen" + ] + }, + "intent": "Follow ['Jakub Klinkovsk\u00fd', 'Koushik', 'Vinta Chen'] on Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": { + "must_include": [ + "@lahwaacz", + "@koush", + "@vinta" + ] + } + } + ] + }, + "intent_template_id": 330 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 535, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Follow {{account_list}} on Gitlab", + "instantiation_dict": { + "account_list": [ + "Jakub K", + "ghost", + "Beno\u00eet Blanchon" + ] + }, + "intent": "Follow ['Jakub K', 'ghost', 'Beno\u00eet Blanchon'] on Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": { + "must_include": [ + "@lahwaacz", + "@ghost", + "@bblanchon" + ] + } + } + ] + }, + "intent_template_id": 330 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 536, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Follow {{account_list}} on Gitlab", + "instantiation_dict": { + "account_list": [ + "ghost", + "R1kk3r", + "Abishek" + ] + }, + "intent": "Follow ['ghost', 'R1kk3r', 'Abishek'] on Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": { + "must_include": [ + "@lahwaacz", + "@R1kk3r", + "@abisubramanya27" + ] + } + } + ] + }, + "intent_template_id": 330 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 537, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Follow {{account_list}} on Gitlab", + "instantiation_dict": { + "account_list": [ + "Jakub Klinkovsk", + "convexegg", + "Vinta Chen", + "yjlou", + "Abishek S" + ] + }, + "intent": "Follow ['Jakub Klinkovsk', 'convexegg', 'Vinta Chen', 'yjlou', 'Abishek S'] on Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": { + "must_include": [ + "@lahwaacz", + "@convexegg", + "@vinta", + "@yjlou", + "@abisubramanya27" + ] + } + } + ] + }, + "intent_template_id": 330 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 538, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Modify the address of order #{{order_id}} to {{address}}", + "instantiation_dict": { + "order_id": "299", + "address": "456 Oak Avenue, Apartment 5B, New York, NY, 10001" + }, + "intent": "Modify the address of order #299 to 456 Oak Avenue, Apartment 5B, New York, NY, 10001", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/view/order_id/299", + "locator": "", + "required_contents": { + "must_include": [ + "456 Oak Avenue", + "Apartment 5B", + "New York", + "10001" + ] + } + } + ] + }, + "intent_template_id": 240 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 539, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Modify the address of order #{{order_id}} to {{address}}", + "instantiation_dict": { + "order_id": "65", + "address": "789 Pine Lane, San Francisco, CA, 94102" + }, + "intent": "Modify the address of order #65 to 789 Pine Lane, San Francisco, CA, 94102", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/view/order_id/65", + "locator": "", + "required_contents": { + "must_include": [ + "789 Pine Lane", + "San Francisco", + "California", + "94102" + ] + } + } + ] + }, + "intent_template_id": 240 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 540, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Modify the address of order #{{order_id}} to {{address}}", + "instantiation_dict": { + "order_id": "301", + "address": "321 Birch Boulevard, Suite 200, Dallas, TX, 75201" + }, + "intent": "Modify the address of order #301 to 321 Birch Boulevard, Suite 200, Dallas, TX, 75201", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/view/order_id/301", + "locator": "", + "required_contents": { + "must_include": [ + "321 Birch Boulevard", + "Suite 200", + "Dallas", + "Texas", + "75201" + ] + } + } + ] + }, + "intent_template_id": 240 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 541, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Modify the address of order #{{order_id}} to {{address}}", + "instantiation_dict": { + "order_id": "125", + "address": "654 Elm Drive, Apartment 12, Miami, FL, 33101" + }, + "intent": "Modify the address of order #125 to 654 Elm Drive, Apartment 12, Miami, FL, 33101", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/view/order_id/125", + "locator": "", + "required_contents": { + "must_include": [ + "654 Elm Drive", + "Apartment 12", + "Miami", + "Florida", + "33101" + ] + } + } + ] + }, + "intent_template_id": 240 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 542, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Modify the address of order #{{order_id}} to {{address}}", + "instantiation_dict": { + "order_id": "300", + "address": "987 Cedar Court, Los Angeles, CA, 90012" + }, + "intent": "Modify the address of order #300 to 987 Cedar Court, Los Angeles, CA, 90012", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/view/order_id/300", + "locator": "", + "required_contents": { + "must_include": [ + "987 Cedar Court", + "Los Angeles", + "California", + "90012" + ] + } + } + ] + }, + "intent_template_id": 240 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 543, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Update the product description of {{product}} to highlight the real user positive reviews by quoting the comments", + "instantiation_dict": { + "product": "Bella Tank" + }, + "intent": "Update the product description of Bella Tank to highlight the real user positive reviews by quoting the comments", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/../bella-tank.html", + "locator": "document.querySelector('.data.item.content').outerText + (document.querySelector('.product.attribute.overview [itemprop=\"description\"]')?.outerText || '')", + "required_contents": { + "must_include": [ + "Good choice for working out and stylin' enough to wear when I'm hanging with friends on hot days", + "Also washes really well", + "Always a sweet n sporty look for the gym", + "Keeps me cool and the seams don't rub up against me like some of my other tanks" + ] + } + } + ] + }, + "intent_template_id": 251 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 544, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Update the description of {{product}} to highlight the real user positive reviews by quoting the comments", + "instantiation_dict": { + "product": "Selena Yoga Hoodie" + }, + "intent": "Update the description of Selena Yoga Hoodie to highlight the real user positive reviews by quoting the comments", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/../selene-yoga-hoodie.html", + "locator": "document.querySelector('.data.item.content').outerText + (document.querySelector('.product.attribute.overview [itemprop=\"description\"]')?.outerText || '')", + "required_contents": { + "must_include": [ + "I was super cold and it did the job.", + "The sleeves are definitely thicker than you realize, which is a good thing", + "really quite substantial", + "planning on buying another one of these in another color", + "the best hoodie ive ever owned" + ] + } + } + ] + }, + "intent_template_id": 251 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 545, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Update the description of {{product}} to highlight the real user positive reviews by quoting the comments", + "instantiation_dict": { + "product": "Radiant Tee" + }, + "intent": "Update the description of Radiant Tee to highlight the real user positive reviews by quoting the comments", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/../radiant-tee.html", + "locator": "document.querySelector('.data.item.content').outerText + (document.querySelector('.product.attribute.overview [itemprop=\"description\"]')?.outerText || '')", + "required_contents": { + "must_include": [ + "What I rally love here is that it does the job of keeping me cool and dry", + "I'm a big guy and sweat A LOT", + "Even after a day of gulf, I'm still dry and comfortable", + "What a versatile shirt", + "Not only does it feel very soft compared to my old worn out polos, but it also does the job promised", + "I like going out after my game for drinks so I look good then too and don't need to change into something fresh" + ] + } + } + ] + }, + "intent_template_id": 251 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 546, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Update the description of {{product}} to highlight the real user positive reviews by quoting the comments", + "instantiation_dict": { + "product": "Lucia Cross-Fit Bra" + }, + "intent": "Update the description of Lucia Cross-Fit Bra to highlight the real user positive reviews by quoting the comments", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/../affirm-water-bottle.html", + "locator": "document.querySelector('.data.item.content').outerText + (document.querySelector('.product.attribute.overview [itemprop=\"description\"]')?.outerText || '')", + "required_contents": { + "must_include": [ + "Wide mouth opening makes it easy to clean" + ] + } + } + ] + }, + "intent_template_id": 251 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 547, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Add a new {{option}} option {{value}} to the {{base_setting}} of {{product}}", + "instantiation_dict": { + "option": "color", + "value": "brown", + "base_setting": "size S", + "product": "Phoebe Zipper Sweatshirt" + }, + "intent": "Add a new color option brown to the size S of Phoebe Zipper Sweatshirt", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1130/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": { + "must_include": [ + "Phoebe Zipper Sweatshirt-S-Brown" + ] + } + } + ] + }, + "intent_template_id": 252 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 548, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Add a new {{option}} {{value}} to {{base_setting}} of {{product}}", + "instantiation_dict": { + "option": "color", + "value": "blue", + "base_setting": "size S and M", + "product": "Frankie Sweatshirt" + }, + "intent": "Add a new color blue to size S and M of Frankie Sweatshirt", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/110/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": { + "must_include": [ + "Sweatshirt-M-Blue", + "Sweatshirt-S-Blue" + ] + } + } + ] + }, + "intent_template_id": 252 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 549, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Add a new {{option}} {{value}} to {{base_setting}} {{product}}", + "instantiation_dict": { + "option": "size", + "value": "XXXL", + "base_setting": "green", + "product": "Minerva LumaTech V-Tee" + }, + "intent": "Add a new size XXXL to green Minerva LumaTech V-Tee", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1492/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": { + "must_include": [ + "V-Tee-XXXL-Green" + ] + } + } + ] + }, + "intent_template_id": 252 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 550, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Add a new {{option}} {{value}} to {{base_setting}} {{product}}", + "instantiation_dict": { + "option": "size", + "value": "XXS", + "base_setting": "blue and purple", + "product": "Nona Fitness Tank" + }, + "intent": "Add a new size XXS to blue and purple Nona Fitness Tank", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1732/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": { + "must_include": [ + "Tank-XXS-Blue", + "Tank-XXS-Purple" + ] + } + } + ] + }, + "intent_template_id": 252 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 551, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Add new {{option}} {{value}} to {{base_setting}} of {{product}}", + "instantiation_dict": { + "option": "size", + "value": "30 and 31", + "base_setting": "all color variants", + "product": "Diana Tights" + }, + "intent": "Add new size 30 and 31 to all color variants of Diana Tights", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1854/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": { + "must_include": [ + "Tights-30-Blue", + "Tights-30-Black", + "Tights-30-Orange", + "Tights-31-Blue", + "Tights-31-Black", + "Tights-31-Orange" + ] + } + } + ] + }, + "intent_template_id": 252 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 552, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a folder named {{directory}} in {{gitlab_repo}} repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the {{subreddit}}?", + "instantiation_dict": { + "directory": "real_space", + "subreddit": "space", + "gitlab_repo": "gimmiethat.space" + }, + "intent": "Create a folder named real_space in gimmiethat.space repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the space?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/gimmiethat.space/-/raw/main/real_space/urls.txt", + "locator": "", + "required_contents": { + "must_include": [ + "http://localhost:28080/f/space/134164/scientists-erupt-at-nasa-gutting-funding-for-crucial-venus", + "http://localhost:28080/f/space/134163/virgin-orbit-fails-to-secure-funding-will-cease-operations", + "http://localhost:28080/f/space/134162/nasa-to-name-artemis-2-crew-next-week-the-first-moon", + "http://localhost:28080/f/space/134161/bent-light-in-deep-space-reveals-one-of-the-biggest-black", + "http://localhost:28080/f/space/134160/seti-s-new-machine-learning-algorithm-works-like-google-s" + ] + } + } + ] + }, + "intent_template_id": 84 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 553, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a folder named {{directory}} in {{gitlab_repo}} repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the {{subreddit}}?", + "instantiation_dict": { + "directory": "news", + "gitlab_repo": "gimmiethat.space", + "subreddit": "news related subreddits" + }, + "intent": "Create a folder named news in gimmiethat.space repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the news related subreddits?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/gimmiethat.space/-/raw/main/news/urls.txt", + "locator": "", + "required_contents": { + "must_include": [ + "http://localhost:28080/f/news/129905/ohio-man-charged-for-using-molotov-cocktails-to-attack", + "http://localhost:28080/f/news/129904/in-a-loss-for-fox-news-judge-allows-dominion-s-defamation", + "http://localhost:28080/f/news/129903/theater-group-sues-to-block-tennessee-s-new-anti-drag-law", + "http://localhost:28080/f/news/129902/andrew-tate-released-from-jail-in-romania-and-placed-under", + "http://localhost:28080/f/news/129901/rare-high-risk-storm-alert-issued-for-parts-of-midwest-and" + ] + } + } + ] + }, + "intent_template_id": 84 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 554, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a folder named {{directory}} in {{gitlab_repo}} repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the {{subreddit}}?", + "instantiation_dict": { + "directory": "moive_space", + "gitlab_repo": "gimmiethat.space", + "subreddit": "movies" + }, + "intent": "Create a folder named moive_space in gimmiethat.space repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the movies?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/gimmiethat.space/-/raw/main/moive_space/urls.txt", + "locator": "", + "required_contents": { + "must_include": [ + "http://localhost:28080/f/movies/128825/scenes-in-film-that-feel-off-or-wrong-in-some-way-and-make", + "http://localhost:28080/f/movies/128824/disney-s-live-action-lilo-amp-stitch-movie-finds-its-lilo-in", + "http://localhost:28080/f/movies/128823/fantastic-four-movie-gets-new-writer-with-avatar-the-way-of", + "http://localhost:28080/f/movies/128822/can-someone-explain-what-made-steven-seagal-so-appealing-for", + "http://localhost:28080/f/movies/128821/ban-on-fetish-sex-depictions-in-film-should-end-australia" + ] + } + } + ] + }, + "intent_template_id": 84 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 555, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a folder named {{directory}} in {{gitlab_repo}} repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the {{subreddit}}?", + "instantiation_dict": { + "directory": "funny_pic", + "gitlab_repo": "gimmiethat.space", + "subreddit": "memes" + }, + "intent": "Create a folder named funny_pic in gimmiethat.space repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the memes?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/gimmiethat.space/-/raw/main/funny_pic/urls.txt", + "locator": "", + "required_contents": { + "must_include": [ + "http://localhost:28080/f/memes/127991/it-do-be-like-that-tho", + "http://localhost:28080/f/memes/127990/thank-you-memers-this-wouldn-t-be-possible-without-you", + "http://localhost:28080/f/memes/127989/if-you-have-no-other-choice", + "http://localhost:28080/f/memes/127988/yes-yes-yes", + "http://localhost:28080/f/memes/127987/shagadelic-baby" + ] + } + } + ] + }, + "intent_template_id": 84 + }, + { + "sites": [ + "gitlab", + "wikipedia" + ], + "task_id": 556, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a repo named {{name}} with {{topics}} in a README file", + "instantiation_dict": { + "name": "nolan_honest_fans", + "topics": "movies directed by Christopher Nolan" + }, + "intent": "Create a repo named nolan_honest_fans with movies directed by Christopher Nolan in a README file", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/nolan_honest_fans/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "Following", + "Memento", + "Insomnia", + "Batman Begins", + "The Prestige", + "The Dark Knight", + "Inception", + "The Dark Knight Rises", + "Interstellar", + "Dunkirk", + "Tenet", + "Oppenheimer" + ] + } + } + ] + }, + "intent_template_id": 87 + }, + { + "sites": [ + "gitlab", + "wikipedia" + ], + "task_id": 557, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a repo named {{name}} with {{topics}} in a README file", + "instantiation_dict": { + "name": "nolan_old_fans", + "topics": "movies directed by Christopher Nolan before 2010" + }, + "intent": "Create a repo named nolan_old_fans with movies directed by Christopher Nolan before 2010 in a README file", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/nolan_old_fans/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "Following", + "Memento", + "Insomnia", + "Batman Begins", + "The Prestige", + "The Dark Knight" + ] + } + } + ] + }, + "intent_template_id": 87 + }, + { + "sites": [ + "gitlab", + "wikipedia" + ], + "task_id": 558, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a repo named {{name}} with {{topics}} in a README file", + "instantiation_dict": { + "name": "nolan_young_fans", + "topics": "movies directed by Christopher Nolan after 2010" + }, + "intent": "Create a repo named nolan_young_fans with movies directed by Christopher Nolan after 2010 in a README file", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/nolan_young_fans/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "Inception", + "The Dark Knight Rises", + "Interstellar", + "Dunkirk", + "Tenet", + "Oppenheimer" + ] + } + } + ] + }, + "intent_template_id": 87 + }, + { + "sites": [ + "gitlab", + "wikipedia" + ], + "task_id": 559, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a repo named {{name}} with {{topics}} in a README file", + "instantiation_dict": { + "name": "nolan_followers", + "topics": "career timeline of Christopher Nolan" + }, + "intent": "Create a repo named nolan_followers with career timeline of Christopher Nolan in a README file", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/nolan_followers/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "1993\u20132003: Early career and breakthrough", + "2003\u20132013: Widespread recognition", + "2014\u20132019: Established Hollywood auteur", + "2020\u2013present" + ] + } + } + ] + }, + "intent_template_id": 87 + }, + { + "sites": [ + "gitlab", + "wikipedia" + ], + "task_id": 560, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a repo named {{name}} with {{topics}} in a README file", + "instantiation_dict": { + "name": "nolan_academy_awards", + "topics": "movies that won Academy Awards by Christopher Nolan" + }, + "intent": "Create a repo named nolan_academy_awards with movies that won Academy Awards by Christopher Nolan in a README file", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/nolan_academy_awards/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "The Dark Knight", + "Inception", + "Interstellar", + "Dunkirk", + "Tenet" + ] + } + } + ] + }, + "intent_template_id": 87 + }, + { + "sites": [ + "gitlab", + "wikipedia" + ], + "task_id": 561, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a repo named {{name}} with {{topics}} in a README file", + "instantiation_dict": { + "name": "bafta_awards_nolan", + "topics": "movies that are nominated BAFTA Awards by Christopher Nolan" + }, + "intent": "Create a repo named bafta_awards_nolan with movies that are nominated BAFTA Awards by Christopher Nolan in a README file", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/bafta_awards_nolan/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "Batman Begins", + "The Dark Knight", + "Inception", + "The Dark Knight Rises", + "Interstellar", + "Dunkirk", + "Tenet" + ] + } + } + ] + }, + "intent_template_id": 87 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 562, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "create a repository named {{name}} that includes a README file with the links to the most active {{num}} DIY ideas on DIY subreddit?", + "instantiation_dict": { + "name": "Awesome_DIY_ideas", + "num": 6 + }, + "intent": "create a repository named Awesome_DIY_ideas that includes a README file with the links to the most active 6 DIY ideas on DIY subreddit?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/Awesome_DIY_ideas/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "http://localhost:28080/f/DIY/118903/separate-glued-plastic-parts", + "http://localhost:28080/f/DIY/118923/how-would-you-fix-this-dryer-vent-mess", + "http://localhost:28080/f/DIY/118935/basement-bulkhead-soffit-wall-framing", + "http://localhost:28080/f/DIY/118904/ge-water-heater-pilot-light-won-t-stay-lit", + "http://localhost:28080/f/DIY/118960/attempting-to-move-a-wall-outlet-in-my-basement-a-few-inches", + "http://localhost:28080/f/DIY/118931/afci-outlet-question" + ] + } + } + ] + }, + "intent_template_id": 88 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 563, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "create a repository named {{name}} that includes a README file with the links to the most active {{num}} DIY ideas on DIY subreddit?", + "instantiation_dict": { + "name": "fun_thing_to_do", + "num": 5 + }, + "intent": "create a repository named fun_thing_to_do that includes a README file with the links to the most active 5 DIY ideas on DIY subreddit?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/fun_thing_to_do/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "http://localhost:28080/f/DIY/118903/separate-glued-plastic-parts", + "http://localhost:28080/f/DIY/118923/how-would-you-fix-this-dryer-vent-mess", + "http://localhost:28080/f/DIY/118935/basement-bulkhead-soffit-wall-framing", + "http://localhost:28080/f/DIY/118904/ge-water-heater-pilot-light-won-t-stay-lit", + "http://localhost:28080/f/DIY/118960/attempting-to-move-a-wall-outlet-in-my-basement-a-few-inches" + ] + } + } + ] + }, + "intent_template_id": 88 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 564, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "create a repository named {{name}} that includes a README file with the links to the most active {{num}} DIY ideas on DIY subreddit?", + "instantiation_dict": { + "name": "live_a_life", + "num": 3 + }, + "intent": "create a repository named live_a_life that includes a README file with the links to the most active 3 DIY ideas on DIY subreddit?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/live_a_life/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "http://localhost:28080/f/DIY/118903/separate-glued-plastic-parts", + "http://localhost:28080/f/DIY/118923/how-would-you-fix-this-dryer-vent-mess", + "http://localhost:28080/f/DIY/118935/basement-bulkhead-soffit-wall-framing" + ] + } + } + ] + }, + "intent_template_id": 88 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 565, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "create a repository named {{name}} that includes a README file with the links to the most active {{num}} DIY ideas on DIY subreddit?", + "instantiation_dict": { + "name": "TODO", + "num": 10 + }, + "intent": "create a repository named TODO that includes a README file with the links to the most active 10 DIY ideas on DIY subreddit?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/TODO/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "http://localhost:28080/f/DIY/118903/separate-glued-plastic-parts", + "http://localhost:28080/f/DIY/118923/how-would-you-fix-this-dryer-vent-mess", + "http://localhost:28080/f/DIY/118935/basement-bulkhead-soffit-wall-framing", + "http://localhost:28080/f/DIY/118904/ge-water-heater-pilot-light-won-t-stay-lit", + "http://localhost:28080/f/DIY/118960/attempting-to-move-a-wall-outlet-in-my-basement-a-few-inches", + "http://localhost:28080/f/DIY/118931/afci-outlet-question", + "http://localhost:28080/f/DIY/118824/teflon-tape-to-attach-washing-machine-drain-hose-to-pipe", + "http://localhost:28080/f/DIY/118866/paver-base-for-shed", + "http://localhost:28080/f/DIY/118820/ways-to-locate-our-buried-electrical-service", + "http://localhost:28080/f/DIY/118836/how-to-eliminate-transitions-for-disability-mobility-reasons" + ] + } + } + ] + }, + "intent_template_id": 88 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 566, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "create a repository named {{name}} that includes a README file with the links to the most active {{num}} DIY ideas on DIY subreddit?", + "instantiation_dict": { + "name": "Do it myself", + "num": 8 + }, + "intent": "create a repository named Do it myself that includes a README file with the links to the most active 8 DIY ideas on DIY subreddit?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/Do-it-myself/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "http://localhost:28080/f/DIY/118903/separate-glued-plastic-parts", + "http://localhost:28080/f/DIY/118923/how-would-you-fix-this-dryer-vent-mess", + "http://localhost:28080/f/DIY/118935/basement-bulkhead-soffit-wall-framing", + "http://localhost:28080/f/DIY/118904/ge-water-heater-pilot-light-won-t-stay-lit", + "http://localhost:28080/f/DIY/118960/attempting-to-move-a-wall-outlet-in-my-basement-a-few-inches", + "http://localhost:28080/f/DIY/118931/afci-outlet-question", + "http://localhost:28080/f/DIY/118824/teflon-tape-to-attach-washing-machine-drain-hose-to-pipe", + "http://localhost:28080/f/DIY/118866/paver-base-for-shed" + ] + } + } + ] + }, + "intent_template_id": 88 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 567, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Invite {{collaborator_account_list}} as collaborator to {{repo}} repo", + "instantiation_dict": { + "collaborator_account_list": "Jakub Klinkovsk\u00fd and Beno\u00eet Blanchon", + "repo": "gimmiethat.space" + }, + "intent": "Invite Jakub Klinkovsk\u00fd and Beno\u00eet Blanchon as collaborator to gimmiethat.space repo", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/gimmiethat.space/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@lahwaacz", + "@bblanchon" + ] + } + } + ] + }, + "intent_template_id": 293 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 568, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Invite {{collaborator_account_list}} as collaborator to {{repo}} repo", + "instantiation_dict": { + "collaborator_account_list": "Abishek and Vinta", + "repo": "a11yproject.com" + }, + "intent": "Invite Abishek and Vinta as collaborator to a11yproject.com repo", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/a11yproject/a11yproject.com/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@abisubramanya27", + "@vinta" + ] + } + } + ] + }, + "intent_template_id": 293 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 569, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Invite {{collaborator_account_list}} as collaborator to {{repo}} repo", + "instantiation_dict": { + "collaborator_account_list": "Beno\u00eet and Abishek", + "repo": "my HTML5 markup extention" + }, + "intent": "Invite Beno\u00eet and Abishek as collaborator to my HTML5 markup extention repo", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/accessible-html-content-patterns/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@bblanchon", + "@abisubramanya27" + ] + } + } + ] + }, + "intent_template_id": 293 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 570, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Invite {{collaborator_account_list}} as collaborator to {{repo}} repo", + "instantiation_dict": { + "collaborator_account_list": "Jakub K, Alex Dills, Alex Hutnik and Beno\u00eet Blanchon", + "repo": "my time tracking tool project" + }, + "intent": "Invite Jakub K, Alex Dills, Alex Hutnik and Beno\u00eet Blanchon as collaborator to my time tracking tool project repo", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/timeit/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@lahwaacz", + "@V13Axel", + "@alexhutnik", + "@bblanchon" + ] + } + } + ] + }, + "intent_template_id": 293 + }, + { + "sites": [ + "shopping" + ], + "task_id": 571, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I recently moved, my address is {{address}}, update my information on OneStopShopping accordingly", + "instantiation_dict": { + "address": "231 Willow Way, Suite 100, Chicago, IL, 60601" + }, + "intent": "I recently moved, my address is 231 Willow Way, Suite 100, Chicago, IL, 60601, update my information on OneStopShopping accordingly", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/customer/address", + "locator": "document.querySelector(\".box.box-address-billing > .box-content\").outerText", + "required_contents": { + "must_include": [ + "231 Willow Way", + "Suite 100", + "Chicago, Illinois, 60601" + ] + } + }, + { + "url": "http://localhost:28082/customer/address", + "locator": "document.querySelector(\".box.box-address-shipping > .box-content\").outerText", + "required_contents": { + "must_include": [ + "231 Willow Way", + "Suite 100", + "Chicago, Illinois, 60601" + ] + } + } + ] + }, + "intent_template_id": 165 + }, + { + "sites": [ + "shopping" + ], + "task_id": 572, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I recently moved, my address is {{address}}, update my information on OneStopShopping accordingly", + "instantiation_dict": { + "address": "654 Aspen Road, House #3, Boston, MA, 02110" + }, + "intent": "I recently moved, my address is 654 Aspen Road, House #3, Boston, MA, 02110, update my information on OneStopShopping accordingly", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/customer/address", + "locator": "document.querySelector(\".box.box-address-billing > .box-content\").outerText", + "required_contents": { + "must_include": [ + "654 Aspen Road", + "House #3", + "Boston, Massachusetts, 02110" + ] + } + }, + { + "url": "http://localhost:28082/customer/address", + "locator": "document.querySelector(\".box.box-address-shipping > .box-content\").outerText", + "required_contents": { + "must_include": [ + "654 Aspen Road", + "House #3", + "Boston, Massachusetts, 02110" + ] + } + } + ] + }, + "intent_template_id": 165 + }, + { + "sites": [ + "shopping" + ], + "task_id": 573, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I recently moved, my address is {{address}}, update my information on OneStopShopping accordingly", + "instantiation_dict": { + "address": "987 Sycamore Circle, Philadelphia, PA, 19102" + }, + "intent": "I recently moved, my address is 987 Sycamore Circle, Philadelphia, PA, 19102, update my information on OneStopShopping accordingly", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/customer/address", + "locator": "document.querySelector(\".box.box-address-shipping > .box-content\").outerText", + "required_contents": { + "must_include": [ + "987 Sycamore Circle", + "Philadelphia, Pennsylvania, 19102" + ] + } + }, + { + "url": "http://localhost:28082/customer/address", + "locator": "document.querySelector(\".box.box-address-billing > .box-content\").outerText", + "required_contents": { + "must_include": [ + "987 Sycamore Circle", + "Philadelphia, Pennsylvania, 19102" + ] + } + } + ] + }, + "intent_template_id": 165 + }, + { + "sites": [ + "shopping" + ], + "task_id": 574, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I recently moved, my address is {{address}}, update my information on OneStopShopping accordingly", + "instantiation_dict": { + "address": "111 Magnolia Path, Atlanta, GA, 30303" + }, + "intent": "I recently moved, my address is 111 Magnolia Path, Atlanta, GA, 30303, update my information on OneStopShopping accordingly", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/customer/address", + "locator": "document.querySelector(\".box.box-address-shipping > .box-content\").outerText", + "required_contents": { + "must_include": [ + "111 Magnolia Path", + "Atlanta, Georgia, 30303" + ] + } + }, + { + "url": "http://localhost:28082/customer/address", + "locator": "document.querySelector(\".box.box-address-billing > .box-content\").outerText", + "required_contents": { + "must_include": [ + "111 Magnolia Path", + "Atlanta, Georgia, 30303" + ] + } + } + ] + }, + "intent_template_id": 165 + }, + { + "sites": [ + "shopping" + ], + "task_id": 575, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I recently moved, my address is {{address}}, update my information on OneStopShopping accordingly", + "instantiation_dict": { + "address": "222 Redwood Rise, Suite 300, Seattle, WA, 98101" + }, + "intent": "I recently moved, my address is 222 Redwood Rise, Suite 300, Seattle, WA, 98101, update my information on OneStopShopping accordingly", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/customer/address", + "locator": "document.querySelector(\".box.box-address-shipping > .box-content\").outerText", + "required_contents": { + "must_include": [ + "222 Redwood Rise", + "Suite 300", + "Seattle, Washington, 98101" + ] + } + }, + { + "url": "http://localhost:28082/customer/address", + "locator": "document.querySelector(\".box.box-address-billing > .box-content\").outerText", + "required_contents": { + "must_include": [ + "222 Redwood Rise", + "Suite 300", + "Seattle, Washington, 98101" + ] + } + } + ] + }, + "intent_template_id": 165 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 576, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Add the following users to repo {{repo}} as {{role}}: {{user_list}}", + "instantiation_dict": { + "repo": "a11y-webring.club", + "role": "developer", + "user_list": [ + "abisubramanya27", + "lahwaacz" + ] + }, + "intent": "Add the following users to repo a11y-webring.club as developer: ['abisubramanya27', 'lahwaacz']", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/a11y-webring.club/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'abisubramanya27')", + "required_contents": { + "must_include": [ + "Developer" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/a11y-webring.club/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'lahwaacz')", + "required_contents": { + "must_include": [ + "Developer" + ] + } + } + ] + }, + "intent_template_id": 351 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 577, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Add the following users to my {{repo}} as {{role}}: {{user_list}}", + "instantiation_dict": { + "repo": "GitHub timeline item management extension", + "role": "maintainer", + "user_list": [ + "abisubramanya27", + "lahwaacz" + ] + }, + "intent": "Add the following users to my GitHub timeline item management extension as maintainer: ['abisubramanya27', 'lahwaacz']", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/remove-board-movement-events-from-the-github-issue-timeline/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'abisubramanya27')", + "required_contents": { + "must_include": [ + "Maintainer" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/remove-board-movement-events-from-the-github-issue-timeline/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'lahwaacz')", + "required_contents": { + "must_include": [ + "Maintainer" + ] + } + } + ] + }, + "intent_template_id": 351 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 578, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Add the following users to repo {{repo}} as {{role}}: {{user_list}}", + "instantiation_dict": { + "repo": "millennials-to-snake-people", + "role": "reporter", + "user_list": [ + "yjlou", + "a11yproject" + ] + }, + "intent": "Add the following users to repo millennials-to-snake-people as reporter: ['yjlou', 'a11yproject']", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/millennials-to-snake-people/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'yjlou')", + "required_contents": { + "must_include": [ + "Reporter" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/millennials-to-snake-people/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'a11yproject')", + "required_contents": { + "must_include": [ + "Reporter" + ] + } + } + ] + }, + "intent_template_id": 351 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 579, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Add the following users to my {{repo}} as {{role}}: {{user_list}}", + "instantiation_dict": { + "repo": "time tracking tool", + "role": "guest", + "user_list": [ + "yjlou" + ] + }, + "intent": "Add the following users to my time tracking tool as guest: ['yjlou']", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/timeit/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'yjlou')", + "required_contents": { + "must_include": [ + "Guest" + ] + } + } + ] + }, + "intent_template_id": 351 + }, + { + "sites": [ + "reddit" + ], + "task_id": 580, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Create a new forum named {{name}}, with a description of {{description}}, and include {{sidebar_list}} in the sidebar?", + "instantiation_dict": { + "name": "sci_fi", + "description": "A wild place for sci-fi enthusiasts", + "sidebar_list": [ + "New", + "Classic", + "Movies", + "Post my novel", + "Random" + ] + }, + "intent": "Create a new forum named sci_fi, with a description of A wild place for sci-fi enthusiasts, and include ['New', 'Classic', 'Movies', 'Post my novel', 'Random'] in the sidebar?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/sci_fi/edit", + "locator": "document.querySelector(\"#forum_description\").value", + "required_contents": { + "must_include": [ + "A wild place for sci-fi enthusiasts" + ] + } + }, + { + "url": "http://localhost:28080/f/sci_fi/edit", + "locator": "document.querySelector(\"#forum_sidebar\").value", + "required_contents": { + "must_include": [ + "New", + "Classic", + "Movies", + "Post my novel", + "Random" + ] + } + } + ] + }, + "intent_template_id": 7 + }, + { + "sites": [ + "reddit" + ], + "task_id": 581, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Create a new forum named {{name}}, with a description of {{description}}, and include {{sidebar_list}} in the sidebar?", + "instantiation_dict": { + "name": "cmu_lti", + "description": "Language Technologies Institute at Carnegie Mellon University", + "sidebar_list": [ + "announcement", + "paper", + "alumni" + ] + }, + "intent": "Create a new forum named cmu_lti, with a description of Language Technologies Institute at Carnegie Mellon University, and include ['announcement', 'paper', 'alumni'] in the sidebar?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/cmu_lti/edit", + "locator": "document.querySelector(\"#forum_description\").value", + "required_contents": { + "must_include": [ + "Language Technologies Institute at Carnegie Mellon University" + ] + } + }, + { + "url": "http://localhost:28080/f/cmu_lti/edit", + "locator": "document.querySelector(\"#forum_sidebar\").value", + "required_contents": { + "must_include": [ + "announcement", + "paper", + "alumni" + ] + } + } + ] + }, + "intent_template_id": 7 + }, + { + "sites": [ + "reddit" + ], + "task_id": 582, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Create a new forum named {{name}}, with a description of {{description}}, and include {{sidebar_list}} in the sidebar?", + "instantiation_dict": { + "name": "Cyberpunk", + "description": "Welcome to the future", + "sidebar_list": [ + "Games", + "Books", + "Movies", + "Future" + ] + }, + "intent": "Create a new forum named Cyberpunk, with a description of Welcome to the future, and include ['Games', 'Books', 'Movies', 'Future'] in the sidebar?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/Cyberpunk/edit", + "locator": "document.querySelector(\"#forum_description\").value", + "required_contents": { + "must_include": [ + "Welcome to the future" + ] + } + }, + { + "url": "http://localhost:28080/f/Cyberpunk/edit", + "locator": "document.querySelector(\"#forum_sidebar\").value", + "required_contents": { + "must_include": [ + "Games", + "Books", + "Movies", + "Future" + ] + } + } + ] + }, + "intent_template_id": 7 + }, + { + "sites": [ + "reddit" + ], + "task_id": 583, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Create a new forum named {{name}}, with a description of {{description}}, and include {{sidebar_list}} in the sidebar?", + "instantiation_dict": { + "name": "PlantsForCatParents", + "description": "Cat parents & plan lovers", + "sidebar_list": [ + "Cat friendly", + "Local vendors", + "Promotion", + "Toxic plants!" + ] + }, + "intent": "Create a new forum named PlantsForCatParents, with a description of Cat parents & plan lovers, and include ['Cat friendly', 'Local vendors', 'Promotion', 'Toxic plants!'] in the sidebar?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/PlantsForCatParents/edit", + "locator": "document.querySelector(\"#forum_description\").value", + "required_contents": { + "must_include": [ + "Cat parents & plan lovers" + ] + } + }, + { + "url": "http://localhost:28080/f/PlantsForCatParents/edit", + "locator": "document.querySelector(\"#forum_sidebar\").value", + "required_contents": { + "must_include": [ + "Cat friendly", + "Local vendors", + "Promotion", + "Toxic plants!" + ] + } + } + ] + }, + "intent_template_id": 7 + }, + { + "sites": [ + "reddit" + ], + "task_id": 584, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Create a new forum named {{name}}, with a description of {{description}}, and include {{sidebar_list}} in the sidebar?", + "instantiation_dict": { + "name": "Karaoke", + "description": "Place for Karaoke lovers", + "sidebar_list": [ + "devices", + "setup" + ] + }, + "intent": "Create a new forum named Karaoke, with a description of Place for Karaoke lovers, and include ['devices', 'setup'] in the sidebar?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/Karaoke", + "locator": "document.querySelector(\"#forum_description\").value", + "required_contents": { + "must_include": [ + "Place for Karaoke lovers" + ] + } + }, + { + "url": "http://localhost:28080/f/Karaoke", + "locator": "document.querySelector(\"#forum_sidebar\").value", + "required_contents": { + "must_include": [ + "devices", + "setup" + ] + } + } + ] + }, + "intent_template_id": 7 + }, + { + "sites": [ + "shopping" + ], + "task_id": 585, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Rate my recent purchase of {{product}} with {{num_star}} stars, using my nickname {{nickname}}?", + "instantiation_dict": { + "product": "floor lamp", + "num_star": 5, + "nickname": "Emma Lopez" + }, + "intent": "Rate my recent purchase of floor lamp with 5 stars, using my nickname Emma Lopez?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating('B00J8RZL7I')", + "required_contents": { + "must_include": [ + "100" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_author('B00J8RZL7I')", + "required_contents": { + "must_include": [ + "Emma Lopez" + ] + } + } + ] + }, + "intent_template_id": 194 + }, + { + "sites": [ + "shopping" + ], + "task_id": 586, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Rate my recent purchase of {{product}} with {{num_star}} stars, using my nickname {{nickname}}?", + "instantiation_dict": { + "product": "Jiffy Corn Muffin Cornbread Mix", + "num_star": 4, + "nickname": "ShoppingEmma" + }, + "intent": "Rate my recent purchase of Jiffy Corn Muffin Cornbread Mix with 4 stars, using my nickname ShoppingEmma?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating('B07HZB38XH')", + "required_contents": { + "must_include": [ + "80" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_author('B07HZB38XH')", + "required_contents": { + "must_include": [ + "ShoppingEmma" + ] + } + } + ] + }, + "intent_template_id": 194 + }, + { + "sites": [ + "shopping" + ], + "task_id": 587, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Rate my recent purchase of {{product}} with {{num_star}} stars, using my nickname {{nickname}}?", + "instantiation_dict": { + "product": "PS3 Remote Controllers", + "num_star": 3, + "nickname": "GamingEmma" + }, + "intent": "Rate my recent purchase of PS3 Remote Controllers with 3 stars, using my nickname GamingEmma?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating('B0041MSF2S')", + "required_contents": { + "must_include": [ + "60" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_author('B0041MSF2S')", + "required_contents": { + "must_include": [ + "GamingEmma" + ] + } + } + ] + }, + "intent_template_id": 194 + }, + { + "sites": [ + "shopping" + ], + "task_id": 588, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Rate my recent purchase of {{product}} with {{num_star}} stars, using my nickname {{nickname}}?", + "instantiation_dict": { + "product": "Foundation For Mattress With Frame Set", + "num_star": 1, + "nickname": "ShoppingEmma" + }, + "intent": "Rate my recent purchase of Foundation For Mattress With Frame Set with 1 stars, using my nickname ShoppingEmma?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating('B07DFJ5XKH')", + "required_contents": { + "must_include": [ + "20" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_author('B07DFJ5XKH')", + "required_contents": { + "must_include": [ + "ShoppingEmma" + ] + } + } + ] + }, + "intent_template_id": 194 + }, + { + "sites": [ + "shopping" + ], + "task_id": 589, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Rate my recent purchase of {{product}} with {{num_star}} stars, using my nickname {{nickname}}?", + "instantiation_dict": { + "product": "Mini Wireless Bluetooth Speaker", + "num_star": 2, + "nickname": "SimpleEmma" + }, + "intent": "Rate my recent purchase of Mini Wireless Bluetooth Speaker with 2 stars, using my nickname SimpleEmma?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating('B09P7BFL4H')", + "required_contents": { + "must_include": [ + "40" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_author('B09P7BFL4H')", + "required_contents": { + "must_include": [ + "SimpleEmma" + ] + } + } + ] + }, + "intent_template_id": 194 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 590, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/primer/design", + "geolocation": null, + "intent_template": "Create a milestone for the upcoming {{event}} starting on {{start_date}} and ending on {{end_date}}", + "instantiation_dict": { + "event": "event of product launch", + "start_date": "1/16/2023", + "end_date": "1/30/2023" + }, + "intent": "Create a milestone for the upcoming event of product launch starting on 1/16/2023 and ending on 1/30/2023", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/primer/design/-/milestones", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"#content-body\").outerText", + "required_contents": { + "must_include": [ + "product launch" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.start_date').outerText", + "required_contents": { + "must_include": [ + "Jan 16, 2030" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.due_date').outerText", + "required_contents": { + "must_include": [ + "Jan 30, 2030" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 339 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 591, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/primer/design", + "geolocation": null, + "intent_template": "Create a milestone for the upcoming {{event}} starting on {{start_date}} and ending on {{end_date}}", + "instantiation_dict": { + "event": "practice of collective code review", + "start_date": "1/16/2023", + "end_date": "in 20 days" + }, + "intent": "Create a milestone for the upcoming practice of collective code review starting on 1/16/2023 and ending on in 20 days", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/primer/design/-/milestones", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"#content-body\").outerText", + "required_contents": { + "must_include": [ + "code review" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.start_date').outerText", + "required_contents": { + "must_include": [ + "Jan 16, 2030" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.due_date').outerText", + "required_contents": { + "must_include": [ + "Feb 5, 2030" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 339 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 592, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/primer/design", + "geolocation": null, + "intent_template": "Create a milestone for the upcoming {{event}} starting on {{start_date}} and ending on {{end_date}}", + "instantiation_dict": { + "event": "task of cleaning sensitive information", + "start_date": "2/16/2023", + "end_date": "in 20 days" + }, + "intent": "Create a milestone for the upcoming task of cleaning sensitive information starting on 2/16/2023 and ending on in 20 days", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/primer/design/-/milestones", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"#content-body\").outerText", + "required_contents": { + "must_include": [ + "sensitive information" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.start_date').outerText", + "required_contents": { + "must_include": [ + "Feb 16, 2030" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.due_date').outerText", + "required_contents": { + "must_include": [ + "Mar 8, 2030" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 339 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 593, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/byteblaze/dotfiles", + "geolocation": null, + "intent_template": "Create a milestone for the upcoming {{event}} starting on {{start_date}} and ending on {{end_date}}", + "instantiation_dict": { + "event": "task of merging all branches to main", + "start_date": "March 15, 2044", + "end_date": "March 30, 2044" + }, + "intent": "Create a milestone for the upcoming task of merging all branches to main starting on March 15, 2044 and ending on March 30, 2044", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/byteblaze/dotfiles/-/milestones", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"#content-body\").outerText", + "required_contents": { + "must_include": [ + "all branches to main" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.start_date').outerText", + "required_contents": { + "must_include": [ + "Mar 15, 2044" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.due_date').outerText", + "required_contents": { + "must_include": [ + "Mar 30, 2044" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 339 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 594, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/byteblaze/dotfiles", + "geolocation": null, + "intent_template": "Create a milestone for the upcoming {{event}} starting on {{start_date}} and ending on {{end_date}}", + "instantiation_dict": { + "event": "task of adding a new branch for zsh comprehensive support", + "start_date": "5/1/2044", + "end_date": "in 20 days" + }, + "intent": "Create a milestone for the upcoming task of adding a new branch for zsh comprehensive support starting on 5/1/2044 and ending on in 20 days", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/byteblaze/dotfiles/-/milestones", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"#content-body\").outerText", + "required_contents": { + "must_include": [ + "zsh comprehensive support" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.start_date').outerText", + "required_contents": { + "must_include": [ + "May 1, 2044" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.due_date').outerText", + "required_contents": { + "must_include": [ + "May 21, 2044" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 339 + }, + { + "sites": [ + "reddit" + ], + "task_id": 595, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Open the thread of a trending post on the forum \"{{subreddit}}\" and subscribe.", + "instantiation_dict": { + "subreddit": "space" + }, + "intent": "Open the thread of a trending post on the forum \"space\" and subscribe.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/space", + "program_html": [ + { + "url": "http://localhost:28080/", + "locator": "document.querySelector(\"#sidebar > section\").outerText", + "required_contents": { + "must_include": [ + "space" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 4 + }, + { + "sites": [ + "reddit" + ], + "task_id": 596, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Open the thread of a trending post on the forum \"{{subreddit}}\" and subscribe.", + "instantiation_dict": { + "subreddit": "books" + }, + "intent": "Open the thread of a trending post on the forum \"books\" and subscribe.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/books", + "program_html": [ + { + "url": "http://localhost:28080/", + "locator": "document.querySelector(\"#sidebar > section\").outerText", + "required_contents": { + "must_include": [ + "books" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 4 + }, + { + "sites": [ + "reddit" + ], + "task_id": 597, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Open the thread of a trending post on the forum \"{{subreddit}}\" and subscribe.", + "instantiation_dict": { + "subreddit": "consoles" + }, + "intent": "Open the thread of a trending post on the forum \"consoles\" and subscribe.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/consoles", + "program_html": [ + { + "url": "http://localhost:28080/", + "locator": "document.querySelector(\"#sidebar > section\").outerText", + "required_contents": { + "must_include": [ + "consoles" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 4 + }, + { + "sites": [ + "reddit" + ], + "task_id": 598, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Open the thread of a trending post on the forum \"{{subreddit}}\" and subscribe.", + "instantiation_dict": { + "subreddit": "pittsburgh" + }, + "intent": "Open the thread of a trending post on the forum \"pittsburgh\" and subscribe.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/pittsburgh", + "program_html": [ + { + "url": "http://localhost:28080/", + "locator": "document.querySelector(\"#sidebar > section\").outerText", + "required_contents": { + "must_include": [ + "pittsburgh" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 4 + }, + { + "sites": [ + "reddit" + ], + "task_id": 599, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Open the thread of a trending post on the forum \"{{subreddit}}\" and subscribe.", + "instantiation_dict": { + "subreddit": "machine learning" + }, + "intent": "Open the thread of a trending post on the forum \"machine learning\" and subscribe.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/MachineLearning", + "program_html": [ + { + "url": "http://localhost:28080/", + "locator": "document.querySelector(\"#sidebar > section\").outerText", + "required_contents": { + "must_include": [ + "MachineLearning" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 4 + }, + { + "sites": [ + "reddit" + ], + "task_id": 600, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post my question, \"{{question}}\", in a subreddit where I'm likely to get an answer", + "instantiation_dict": { + "question": "what is the recommended console to buy these days" + }, + "intent": "Post my question, \"what is the recommended console to buy these days\", in a subreddit where I'm likely to get an answer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/consoles", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "what is the recommended console to buy these days" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 3765 + }, + { + "sites": [ + "reddit" + ], + "task_id": 601, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post my question, \"{{question}}\", in a subreddit where I'm likely to get an answer", + "instantiation_dict": { + "question": "is car necessary in NYC" + }, + "intent": "Post my question, \"is car necessary in NYC\", in a subreddit where I'm likely to get an answer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/nyc", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "is car necessary in NYC" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 3765 + }, + { + "sites": [ + "reddit" + ], + "task_id": 602, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post my question, \"{{question}}\", in a subreddit where I'm likely to get an answer", + "instantiation_dict": { + "question": "places for new drivers to learn driving in pittsburgh" + }, + "intent": "Post my question, \"places for new drivers to learn driving in pittsburgh\", in a subreddit where I'm likely to get an answer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/pittsburgh", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "places for new drivers to learn driving in pittsburgh" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 3765 + }, + { + "sites": [ + "reddit" + ], + "task_id": 603, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post my question, \"{{question}}\", in a subreddit where I'm likely to get an answer", + "instantiation_dict": { + "question": "safe and budge apartment to live in nyc" + }, + "intent": "Post my question, \"safe and budge apartment to live in nyc\", in a subreddit where I'm likely to get an answer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/nyc", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "safe and budge apartment to live in nyc" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 3765 + }, + { + "sites": [ + "reddit" + ], + "task_id": 604, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post my question, \"{{question}}\", in a subreddit where I'm likely to get an answer", + "instantiation_dict": { + "question": "what is the SOTA web navigation agent repo" + }, + "intent": "Post my question, \"what is the SOTA web navigation agent repo\", in a subreddit where I'm likely to get an answer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/deeplearning |OR| http://localhost:28080/f/MachineLearning |OR| http://localhost:28080/f/singularity", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "what is the SOTA web navigation agent repo" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 3765 + }, + { + "sites": [ + "reddit" + ], + "task_id": 605, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Find a subreddit focused on topics related to {{topic}}, and post my question, \"{{question}}\" there", + "instantiation_dict": { + "topic": "gaming consoles", + "question": "what is the recommended console to buy these days" + }, + "intent": "Find a subreddit focused on topics related to gaming consoles, and post my question, \"what is the recommended console to buy these days\" there", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/consoles", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "what is the recommended console to buy these days" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 5 + }, + { + "sites": [ + "reddit" + ], + "task_id": 606, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Find a subreddit focused on topics related to {{topic}}, and post my question, \"{{question}}\" there", + "instantiation_dict": { + "topic": "NYC", + "question": "is car necessary" + }, + "intent": "Find a subreddit focused on topics related to NYC, and post my question, \"is car necessary\" there", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/nyc", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "is car necessary" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 5 + }, + { + "sites": [ + "reddit" + ], + "task_id": 607, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Find a subreddit focused on topics related to {{topic}}, and post my question, \"{{question}}\" there", + "instantiation_dict": { + "topic": "city Pittsburgh", + "question": "places for new drivers to learn driving" + }, + "intent": "Find a subreddit focused on topics related to city Pittsburgh, and post my question, \"places for new drivers to learn driving\" there", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/pittsburgh", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "places for new drivers to learn driving" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 5 + }, + { + "sites": [ + "reddit" + ], + "task_id": 608, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Find a subreddit focused on topics related to {{topic}}, and post my question, \"{{question}}\" there", + "instantiation_dict": { + "topic": "city lives in DMV area", + "question": "safe and budge apartment to live" + }, + "intent": "Find a subreddit focused on topics related to city lives in DMV area, and post my question, \"safe and budge apartment to live\" there", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/washington |OR| http://localhost:28080/f/washingtondc", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "safe and budge apartment to live" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 5 + }, + { + "sites": [ + "reddit" + ], + "task_id": 609, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Find a subreddit focused on topics related to {{topic}}, and post my question, \"{{question}}\" there", + "instantiation_dict": { + "topic": "ML, DL, NLP", + "question": "what is the SOTA web navigation agent repo" + }, + "intent": "Find a subreddit focused on topics related to ML, DL, NLP, and post my question, \"what is the SOTA web navigation agent repo\" there", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/deeplearning |OR| http://localhost:28080/f/MachineLearning |OR| http://localhost:28080/f/singularity", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "what is the SOTA web navigation agent repo" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 5 + }, + { + "sites": [ + "reddit" + ], + "task_id": 610, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post a review of my recent reading \"{{book}}\" in the r/books with my comment \"{{content}}\".", + "instantiation_dict": { + "book": "To Kill a Mockingbird by Harper Lee", + "content": "good book!" + }, + "intent": "Post a review of my recent reading \"To Kill a Mockingbird by Harper Lee\" in the r/books with my comment \"good book!\".", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "To Kill a Mockingbird by Harper Lee", + "good book!" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 9 + }, + { + "sites": [ + "reddit" + ], + "task_id": 611, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post a review of my recent reading \"{{book}}\" in the r/books with my comment \"{{content}}\".", + "instantiation_dict": { + "book": "Harry Potter", + "content": "Wonderful journey" + }, + "intent": "Post a review of my recent reading \"Harry Potter\" in the r/books with my comment \"Wonderful journey\".", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "Harry Potter", + "Wonderful journey" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 9 + }, + { + "sites": [ + "reddit" + ], + "task_id": 612, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post a review of my recent reading \"{{book}}\" in the r/books with my comment \"{{content}}\".", + "instantiation_dict": { + "book": "big little lies", + "content": "can't stop it" + }, + "intent": "Post a review of my recent reading \"big little lies\" in the r/books with my comment \"can't stop it\".", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "big little lies", + "can't stop it" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 9 + }, + { + "sites": [ + "reddit" + ], + "task_id": 613, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post a review of my recent reading \"{{book}}\" in the r/books with my comment \"{{content}}\".", + "instantiation_dict": { + "book": "Love story", + "content": "I cried" + }, + "intent": "Post a review of my recent reading \"Love story\" in the r/books with my comment \"I cried\".", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "Love story", + "I cried" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 9 + }, + { + "sites": [ + "reddit" + ], + "task_id": 614, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post a review of my recent reading \"{{book}}\" in the r/books with my comment \"{{content}}\".", + "instantiation_dict": { + "book": "Gone with the wind", + "content": "It's a book with history" + }, + "intent": "Post a review of my recent reading \"Gone with the wind\" in the r/books with my comment \"It's a book with history\".", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "Gone with the wind", + "It's a book with history" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 9 + }, + { + "sites": [ + "reddit" + ], + "task_id": 615, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080/f/pics", + "geolocation": null, + "intent_template": "Re-post the image of {{content}} in this page to {{subreddit}} subreddit and note \"from /f/pics\"", + "instantiation_dict": { + "content": "Bald Eagle", + "subreddit": "earthporn" + }, + "intent": "Re-post the image of Bald Eagle in this page to earthporn subreddit and note \"from /f/pics\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/earthporn", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "from /f/pics" + ] + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "[...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "b02113033af32feae9ff147dbbe3764039368d67d193885bd04e65c2e6beea9c.jpg" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 11 + }, + { + "sites": [ + "reddit" + ], + "task_id": 616, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080/f/pics", + "geolocation": null, + "intent_template": "Re-post the image of {{content}} in this page to {{subreddit}} subreddit and note \"from /f/pics\"", + "instantiation_dict": { + "content": "Thanksgiving turkey", + "subreddit": "funny" + }, + "intent": "Re-post the image of Thanksgiving turkey in this page to funny subreddit and note \"from /f/pics\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/funny", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "from /f/pics" + ] + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "[...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "2e4fa0a328e653a97a7d07046291c298ef5b4e0d0c73a287f317ca86a8e8685f.jpg" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 11 + }, + { + "sites": [ + "reddit" + ], + "task_id": 617, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080/f/pics", + "geolocation": null, + "intent_template": "Re-post the image of {{content}} in this page to {{subreddit}} subreddit and note \"from /f/pics\"", + "instantiation_dict": { + "content": "Firework", + "subreddit": "earthporn" + }, + "intent": "Re-post the image of Firework in this page to earthporn subreddit and note \"from /f/pics\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/earthporn", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "from /f/pics" + ] + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "[...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "92411be6af4e9ad5ccd3ccbaa01c10457bb00e704e99c58dd430de1a958307fd.jpg" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 11 + }, + { + "sites": [ + "reddit" + ], + "task_id": 618, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080/f/pics", + "geolocation": null, + "intent_template": "Re-post the image of {{content}} in this page to {{subreddit}} subreddit and note \"from /f/pics\"", + "instantiation_dict": { + "content": "Wife's costume", + "subreddit": "funny" + }, + "intent": "Re-post the image of Wife's costume in this page to funny subreddit and note \"from /f/pics\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/funny", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "from /f/pics" + ] + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "[...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "6bfbf1280d28d84a9261695f0cac5a90addaaff1174807a2b381fdc159f3ed00.jpg" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 11 + }, + { + "sites": [ + "reddit" + ], + "task_id": 619, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080/f/pics", + "geolocation": null, + "intent_template": "Re-post the image of {{content}} in this page to {{subreddit}} subreddit and note \"from /f/pics\"", + "instantiation_dict": { + "content": "costume contest", + "subreddit": "funny" + }, + "intent": "Re-post the image of costume contest in this page to funny subreddit and note \"from /f/pics\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/funny", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "from /f/pics" + ] + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "[...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "bd8bc5f4c846aac4df08626faa3a34a7d47c8f3bdd92bf615a54afd939f063a7.jpg" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 11 + }, + { + "sites": [ + "reddit" + ], + "task_id": 620, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Ask for advice about {{issue}} in a subreddit for relations", + "instantiation_dict": { + "issue": "deal with long-distance relationships" + }, + "intent": "Ask for advice about deal with long-distance relationships in a subreddit for relations", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/relationship_advice", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "long", + "distance", + "relation" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 12 + }, + { + "sites": [ + "reddit" + ], + "task_id": 621, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Ask for advice about {{issue}} in a subreddit for relations", + "instantiation_dict": { + "issue": "cheat" + }, + "intent": "Ask for advice about cheat in a subreddit for relations", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/relationship_advice", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "cheat" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 12 + }, + { + "sites": [ + "reddit" + ], + "task_id": 622, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Ask for advice about {{issue}} in a subreddit for relations", + "instantiation_dict": { + "issue": "sexual harassment" + }, + "intent": "Ask for advice about sexual harassment in a subreddit for relations", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/relationship_advice", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "sexual", + "harassment" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 12 + }, + { + "sites": [ + "reddit" + ], + "task_id": 623, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Ask for advice about {{issue}} in a subreddit for relations", + "instantiation_dict": { + "issue": "gift for birthday" + }, + "intent": "Ask for advice about gift for birthday in a subreddit for relations", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/relationship_advice", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "gift", + "birthday" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 12 + }, + { + "sites": [ + "reddit" + ], + "task_id": 624, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Ask for advice about {{issue}} in a subreddit for relations", + "instantiation_dict": { + "issue": "break-up remedy" + }, + "intent": "Ask for advice about break-up remedy in a subreddit for relations", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/relationship_advice", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "break", + "remedy" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 12 + }, + { + "sites": [ + "reddit" + ], + "task_id": 625, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Create a discussion post about \"{{topic}}\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "instantiation_dict": { + "topic": "the effectiveness of online learning" + }, + "intent": "Create a discussion post about \"the effectiveness of online learning\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/machinelearning |OR| http://localhost:28080/f/deeplearning", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "your opinion", + "the effectiveness of online learning" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 13 + }, + { + "sites": [ + "reddit" + ], + "task_id": 626, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Create a discussion post about \"{{topic}}\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "instantiation_dict": { + "topic": "Iphone 14" + }, + "intent": "Create a discussion post about \"Iphone 14\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/iphone", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "your opinion", + "Iphone 14" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 13 + }, + { + "sites": [ + "reddit" + ], + "task_id": 627, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Create a discussion post about \"{{topic}}\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "instantiation_dict": { + "topic": "Harry Potter movie series" + }, + "intent": "Create a discussion post about \"Harry Potter movie series\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/movies", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "your opinion", + "Harry Potter movie series" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 13 + }, + { + "sites": [ + "reddit" + ], + "task_id": 628, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Create a discussion post about \"{{topic}}\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "instantiation_dict": { + "topic": "long distance relationship" + }, + "intent": "Create a discussion post about \"long distance relationship\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/relationship_advice", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "your opinion", + "long distance relationship" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 13 + }, + { + "sites": [ + "reddit" + ], + "task_id": 629, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Create a discussion post about \"{{topic}}\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "instantiation_dict": { + "topic": "Fun thing to do in Pittsburgh" + }, + "intent": "Create a discussion post about \"Fun thing to do in Pittsburgh\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/pittsburgh", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "your opinion", + "Fun thing to do in Pittsburgh" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 13 + }, + { + "sites": [ + "reddit" + ], + "task_id": 630, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Ask for product recommendations for {{category}} within a budget of {{price}} in {{subreddit}}", + "instantiation_dict": { + "category": "noise-cancelling headphones", + "price": "$200", + "subreddit": "r/headphones" + }, + "intent": "Ask for product recommendations for noise-cancelling headphones within a budget of $200 in r/headphones", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/headphones", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "200", + "noise-cancelling", + "headphone" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 15 + }, + { + "sites": [ + "reddit" + ], + "task_id": 631, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Ask for product recommendations for {{category}} within a budget of {{price}} in {{subreddit}}", + "instantiation_dict": { + "category": "running shoes", + "price": "$100", + "subreddit": "r/sports" + }, + "intent": "Ask for product recommendations for running shoes within a budget of $100 in r/sports", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/sports", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "100", + "running", + "shoes" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 15 + }, + { + "sites": [ + "reddit" + ], + "task_id": 632, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Ask for product recommendations for {{category}} within a budget of {{price}} in {{subreddit}}", + "instantiation_dict": { + "category": "running shoes", + "price": "$500", + "subreddit": "r/sports" + }, + "intent": "Ask for product recommendations for running shoes within a budget of $500 in r/sports", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/sports", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "500", + "running", + "shoes" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 15 + }, + { + "sites": [ + "reddit" + ], + "task_id": 633, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Ask for product recommendations for {{category}} within a budget of {{price}} in {{subreddit}}", + "instantiation_dict": { + "category": "running pants", + "price": "$500", + "subreddit": "r/sports" + }, + "intent": "Ask for product recommendations for running pants within a budget of $500 in r/sports", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/sports", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "500", + "running", + "pants" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 15 + }, + { + "sites": [ + "reddit" + ], + "task_id": 634, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Ask for product recommendations for {{category}} within a budget of {{price}} in {{subreddit}}", + "instantiation_dict": { + "category": "used iphone", + "price": "$1000", + "subreddit": "r/iphone" + }, + "intent": "Ask for product recommendations for used iphone within a budget of $1000 in r/iphone", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/iphone", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "1000", + "used iphone" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 15 + }, + { + "sites": [ + "reddit" + ], + "task_id": 635, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post in the most appropriate subreddit and ask for recommendations for {{category}} products within a budget of {{price}}", + "instantiation_dict": { + "category": "noise-cancelling headphones", + "price": "$200" + }, + "intent": "Post in the most appropriate subreddit and ask for recommendations for noise-cancelling headphones products within a budget of $200", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/headphones", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "200", + "noise-cancelling", + "headphone" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 6100 + }, + { + "sites": [ + "reddit" + ], + "task_id": 636, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post in the most appropriate subreddit and ask for recommendations for {{category}} products within a budget of {{price}}", + "instantiation_dict": { + "category": "DIY toolkit", + "price": "$100" + }, + "intent": "Post in the most appropriate subreddit and ask for recommendations for DIY toolkit products within a budget of $100", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/DIY", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "100", + "DIY", + "toolkit" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 6100 + }, + { + "sites": [ + "reddit" + ], + "task_id": 637, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post in the most appropriate subreddit and ask for recommendations for {{category}} products within a budget of {{price}}", + "instantiation_dict": { + "category": "sony headphones", + "price": "$500" + }, + "intent": "Post in the most appropriate subreddit and ask for recommendations for sony headphones products within a budget of $500", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/headphones", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "500", + "sony headphone" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 6100 + }, + { + "sites": [ + "reddit" + ], + "task_id": 638, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post in the most appropriate subreddit and ask for recommendations for {{category}} products within a budget of {{price}}", + "instantiation_dict": { + "category": "must-have product in my life", + "price": "$30" + }, + "intent": "Post in the most appropriate subreddit and ask for recommendations for must-have product in my life products within a budget of $30", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/BuyItForLife", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "30", + "must-have", + "product", + "life" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 6100 + }, + { + "sites": [ + "reddit" + ], + "task_id": 639, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post in the most appropriate subreddit and ask for recommendations for {{category}} products within a budget of {{price}}", + "instantiation_dict": { + "category": "used iphone", + "price": "$1000" + }, + "intent": "Post in the most appropriate subreddit and ask for recommendations for used iphone products within a budget of $1000", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/iphone", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "1000", + "used iphone" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 6100 + }, + { + "sites": [ + "reddit" + ], + "task_id": 640, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post a notice on a virtual meetup for {{interest}} enthusiasts on {{date}} in the {{subreddit}} subreddit", + "instantiation_dict": { + "interest": "book reading", + "date": "March 15th", + "subreddit": "r/books" + }, + "intent": "Post a notice on a virtual meetup for book reading enthusiasts on March 15th in the r/books subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "book reading", + "March 15th", + "virtual meetup" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 16 + }, + { + "sites": [ + "reddit" + ], + "task_id": 641, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post a notice on a virtual meetup for {{interest}} enthusiasts on {{date}} in the {{subreddit}} subreddit", + "instantiation_dict": { + "interest": "Harry Potter", + "date": "July 8th", + "subreddit": "books" + }, + "intent": "Post a notice on a virtual meetup for Harry Potter enthusiasts on July 8th in the books subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "Harry Potter", + "July 8th", + "virtual meetup" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 16 + }, + { + "sites": [ + "reddit" + ], + "task_id": 642, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post a notice on a virtual meetup for {{interest}} enthusiasts on {{date}} in the {{subreddit}} subreddit", + "instantiation_dict": { + "interest": "Big little lies", + "date": "Sep 10th", + "subreddit": "books" + }, + "intent": "Post a notice on a virtual meetup for Big little lies enthusiasts on Sep 10th in the books subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "Big little lies", + "Sep 10th", + "virtual meetup" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 16 + }, + { + "sites": [ + "reddit" + ], + "task_id": 643, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post a notice on a virtual meetup for {{interest}} enthusiasts on {{date}} in the {{subreddit}} subreddit", + "instantiation_dict": { + "interest": "racing cars", + "date": "Oct 21st", + "subreddit": "nyc" + }, + "intent": "Post a notice on a virtual meetup for racing cars enthusiasts on Oct 21st in the nyc subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/nyc", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "racing cars", + "Oct 21st", + "virtual meetup" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 16 + }, + { + "sites": [ + "reddit" + ], + "task_id": 644, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post a notice on a virtual meetup for {{interest}} enthusiasts on {{date}} in the {{subreddit}} subreddit", + "instantiation_dict": { + "interest": "Tears of Kingdom", + "date": "Dec 15th", + "subreddit": "games" + }, + "intent": "Post a notice on a virtual meetup for Tears of Kingdom enthusiasts on Dec 15th in the games subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/games", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "Tears of Kingdom", + "Dec 15th", + "virtual meetup" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 16 + }, + { + "sites": [ + "reddit" + ], + "task_id": 645, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post in {{subreddit}} subreddit about what could machine learning help the correpong field.", + "instantiation_dict": { + "subreddit": "books" + }, + "intent": "Post in books subreddit about what could machine learning help the correpong field.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "machine learning", + "help" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 19 + }, + { + "sites": [ + "reddit" + ], + "task_id": 646, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post in {{subreddit}} subreddit about what could midjourney help the correpong field.", + "instantiation_dict": { + "subreddit": "DIY" + }, + "intent": "Post in DIY subreddit about what could midjourney help the correpong field.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/diy", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "midjourney", + "help" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 19 + }, + { + "sites": [ + "reddit" + ], + "task_id": 647, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post in {{subreddit}} forum about what could open-source LLMs help the correpong field.", + "instantiation_dict": { + "subreddit": "technology" + }, + "intent": "Post in technology forum about what could open-source LLMs help the correpong field.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/technology", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "open-source LLMs", + "help" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 19 + }, + { + "sites": [ + "reddit" + ], + "task_id": 648, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post in {{subreddit}} forum about what could large language models help the correpong field.", + "instantiation_dict": { + "subreddit": "dataisbeautiful" + }, + "intent": "Post in dataisbeautiful forum about what could large language models help the correpong field.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/dataisbeautiful", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "large language models", + "help" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 19 + }, + { + "sites": [ + "reddit" + ], + "task_id": 649, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post in {{subreddit}} subreddit about what could diffusion model help the correpong field.", + "instantiation_dict": { + "subreddit": "history" + }, + "intent": "Post in history subreddit about what could diffusion model help the correpong field.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/history", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "diffusion model", + "help" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 19 + }, + { + "sites": [ + "reddit" + ], + "task_id": 650, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080/f/books/59421/friendly-reminder-bookshop-org-exists", + "geolocation": null, + "intent_template": "Reply to {{position_description}} with my comment \"{{content_description}}\"", + "instantiation_dict": { + "position_description": "the post", + "content_description": "I am a big fan of the bookorg" + }, + "intent": "Reply to the post with my comment \"I am a big fan of the bookorg\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/books/59421/friendly-reminder-bookshop-org-exists", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.comment__body').outerText", + "required_contents": { + "exact_match": "I am a big fan of the bookorg" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 23 + }, + { + "sites": [ + "reddit" + ], + "task_id": 651, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080/f/pittsburgh/45899/driving-in-pittsburgh-summed-up-by-one-traffic-sign", + "geolocation": null, + "intent_template": "Reply to {{position_description}} with my comment \"{{content_description}}\"", + "instantiation_dict": { + "position_description": "the post", + "content_description": "Yeah, pittsburgh traffic, you know..." + }, + "intent": "Reply to the post with my comment \"Yeah, pittsburgh traffic, you know...\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/pittsburgh/45899/driving-in-pittsburgh-summed-up-by-one-traffic-sign", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.comment__body').outerText", + "required_contents": { + "exact_match": "Yeah, pittsburgh traffic, you know..." + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 23 + }, + { + "sites": [ + "reddit" + ], + "task_id": 652, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080/f/technology/134852/ai-experts-disown-musk-backed-campaign-citing-their-research", + "geolocation": null, + "intent_template": "Reply to {{position_description}} with my comment \"{{content_description}}\"", + "instantiation_dict": { + "position_description": "the post", + "content_description": "???" + }, + "intent": "Reply to the post with my comment \"???\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/technology/134852/ai-experts-disown-musk-backed-campaign-citing-their-research", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.comment__body').outerText", + "required_contents": { + "exact_match": "???" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 23 + }, + { + "sites": [ + "shopping" + ], + "task_id": 653, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Fill the \"contact us\" form in the site for a refund on the {{product}} I bought, stating that it broke after just three days of use. Also, ensure to include the order number #{{order_id}} and the product SKU. Don't submit yet, I will check.", + "instantiation_dict": { + "product": "phone screen protector", + "order_id": "000000180" + }, + "intent": "Fill the \"contact us\" form in the site for a refund on the phone screen protector I bought, stating that it broke after just three days of use. Also, ensure to include the order number #000000180 and the product SKU. Don't submit yet, I will check.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "refund", + "it broke after three days of use", + "000000180", + "B087QJN9W1" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 153 + }, + { + "sites": [ + "shopping" + ], + "task_id": 654, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Fill the \"contact us\" form in the site for a refund on the {{product}} I bought, stating that it broke after just three days of use. Also, ensure to include the order number #{{order_id}} and the product SKU. Don't submit yet, I will check.", + "instantiation_dict": { + "product": "bluetooth speaker", + "order_id": "161" + }, + "intent": "Fill the \"contact us\" form in the site for a refund on the bluetooth speaker I bought, stating that it broke after just three days of use. Also, ensure to include the order number #161 and the product SKU. Don't submit yet, I will check.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "refund", + "it broke after three days of use", + "161", + "B09P7BFL4H" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 153 + }, + { + "sites": [ + "shopping" + ], + "task_id": 655, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Fill the \"contact us\" form in the site for a refund on the {{product}} I bought, stating that it broke after just three days of use. Also, ensure to include the order number #{{order_id}} and the product SKU. Don't submit yet, I will check.", + "instantiation_dict": { + "product": "iphone case", + "order_id": "180" + }, + "intent": "Fill the \"contact us\" form in the site for a refund on the iphone case I bought, stating that it broke after just three days of use. Also, ensure to include the order number #180 and the product SKU. Don't submit yet, I will check.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "refund", + "it broke after three days of use", + "180", + "B087QJN9W1" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 153 + }, + { + "sites": [ + "shopping" + ], + "task_id": 656, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Fill the \"contact us\" form in the site for a refund on the {{product}} I bought, stating that it broke after just three days of use. Also, ensure to include the order number #{{order_id}} and the product SKU. Don't submit yet, I will check.", + "instantiation_dict": { + "product": "remote controller", + "order_id": "180" + }, + "intent": "Fill the \"contact us\" form in the site for a refund on the remote controller I bought, stating that it broke after just three days of use. Also, ensure to include the order number #180 and the product SKU. Don't submit yet, I will check.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "refund", + "it broke after three days of use", + "180", + "B0041MSF2S" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 153 + }, + { + "sites": [ + "shopping" + ], + "task_id": 657, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Fill the \"contact us\" form in the site for a refund on the {{product}} I bought, stating that it broke after just three days of use. Also, ensure to include the order number #{{order_id}} and the product SKU. Don't submit yet, I will check.", + "instantiation_dict": { + "product": "speaker", + "order_id": "148" + }, + "intent": "Fill the \"contact us\" form in the site for a refund on the speaker I bought, stating that it broke after just three days of use. Also, ensure to include the order number #148 and the product SKU. Don't submit yet, I will check.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "refund", + "broke after three days of use", + "148", + "B003FVW3VA" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 153 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 658, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create an issue in {{repo}} repo with title \"{{issue}}\". Assign the issue to {{account}}. Set due date to be {{due}}", + "instantiation_dict": { + "repo": "a11yproject", + "issue": "401 bad gateway", + "account": "Roshanjossey", + "due": "the end of 2030" + }, + "intent": "Create an issue in a11yproject repo with title \"401 bad gateway\". Assign the issue to Roshanjossey. Set due date to be the end of 2030", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/a11yproject/a11yproject.com/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[data-qa-selector=\"title_content\"]').outerText", + "required_contents": { + "exact_match": "401 bad gateway" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-testid=\"sidebar-due-date\"').outerText", + "required_contents": { + "must_include": [ + "Dec 31, 2030" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.assignee').outerText", + "required_contents": { + "must_include": [ + "Roshan Jossey" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 327 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 659, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create an issue in {{repo}} repo with title \"{{issue}}\". Assign the issue to {{account}}. Set due date to be {{due}}", + "instantiation_dict": { + "repo": "empathy-prompts", + "issue": "Integrating LLMs for better prompts", + "account": "Roshanjossey", + "due": "the beginning of Q2 2033" + }, + "intent": "Create an issue in empathy-prompts repo with title \"Integrating LLMs for better prompts\". Assign the issue to Roshanjossey. Set due date to be the beginning of Q2 2033", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/byteblaze/empathy-prompts/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[data-qa-selector=\"title_content\"]').outerText", + "required_contents": { + "exact_match": "Integrating LLMs for better prompts" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-testid=\"sidebar-due-date\"').outerText", + "required_contents": { + "must_include": [ + "Apr 1, 2033" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.assignee').outerText", + "required_contents": { + "must_include": [ + "Roshan Jossey" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 327 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 660, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create an issue in {{repo}} repo with title \"{{issue}}\". Assign the issue to {{account}}. Set due date to be {{due}}", + "instantiation_dict": { + "repo": "dotfiles", + "issue": "add support for oh-my-zsh", + "account": "Abishek", + "due": "July 18 2033" + }, + "intent": "Create an issue in dotfiles repo with title \"add support for oh-my-zsh\". Assign the issue to Abishek. Set due date to be July 18 2033", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/byteblaze/dotfiles/-/tree/main", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[data-qa-selector=\"title_content\"]').outerText", + "required_contents": { + "exact_match": "add support for oh-my-zsh" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-testid=\"sidebar-due-date\"').outerText", + "required_contents": { + "must_include": [ + "Jul 18, 2033" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.assignee').outerText", + "required_contents": { + "must_include": [ + "Abishek S" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 327 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 661, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Open an issue to {{issue}} in {{repo}}.", + "instantiation_dict": { + "repo": "ChatGPT", + "issue": "report the issue of connection refused" + }, + "intent": "Open an issue to report the issue of connection refused in ChatGPT.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/convexegg/chatgpt/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": { + "must_include": [ + "connection refused" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 328 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 662, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Open an issue to {{issue}} in {{repo}}.", + "instantiation_dict": { + "repo": "aem-hacker", + "issue": "report experiencing \"OSError: [Errno 98] Address already in use\" during executions" + }, + "intent": "Open an issue to report experiencing \"OSError: [Errno 98] Address already in use\" during executions in aem-hacker.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/0ang3el/aem-hacker/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": { + "must_include": [ + "OSError: [Errno 98] Address already in use" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 328 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 663, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Open an issue to {{issue}} in {{repo}}.", + "instantiation_dict": { + "repo": "metaseq", + "issue": "ask their plan on supporting Llama and other llama family models" + }, + "intent": "Open an issue to ask their plan on supporting Llama and other llama family models in metaseq.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/root/metaseq/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": { + "must_include": [ + "llama" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 328 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 664, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Open an issue to {{issue}} in {{repo}}.", + "instantiation_dict": { + "repo": "awesome-python", + "issue": "ask their plans on adding Python 3.11 related resources" + }, + "intent": "Open an issue to ask their plans on adding Python 3.11 related resources in awesome-python.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/vinta/awesome-python/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": { + "must_include": [ + "Python 3.11" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 328 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 665, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Open an issue to {{issue}} in {{repo}}.", + "instantiation_dict": { + "repo": "a11y-syntax-highlighting", + "issue": "request adding support for MT theme editor" + }, + "intent": "Open an issue to request adding support for MT theme editor in a11y-syntax-highlighting.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/byteblaze/a11y-syntax-highlighting/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": { + "must_include": [ + "MT theme editor" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 328 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 666, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/primer/design", + "geolocation": null, + "intent_template": "Submit a request to merge {{source_branch}} branch into {{target_branch}} branch, assign {{reviewer}} as the reviewer", + "instantiation_dict": { + "source_branch": "dialog-component", + "target_branch": "dialog", + "reviewer": "Carol" + }, + "intent": "Submit a request to merge dialog-component branch into dialog branch, assign Carol as the reviewer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/primer/design/-/merge_requests", + "program_html": [ + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[1].outerText", + "required_contents": { + "exact_match": "dialog" + } + }, + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[0].outerText", + "required_contents": { + "exact_match": "dialog-component" + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.reviewer').outerText", + "required_contents": { + "must_include": [ + "Caroline Stewart" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 335 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 667, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/primer/design", + "geolocation": null, + "intent_template": "Submit a merge request for {{source_branch}} branch to be merged into {{target_branch}} branch, assign {{reviewer}} as the reviewer", + "instantiation_dict": { + "source_branch": "dialog-component", + "target_branch": "bump-doctocat", + "reviewer": "primer" + }, + "intent": "Submit a merge request for dialog-component branch to be merged into bump-doctocat branch, assign primer as the reviewer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/primer/design/-/merge_requests", + "program_html": [ + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[1].outerText", + "required_contents": { + "exact_match": "bump-doctocat" + } + }, + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[0].outerText", + "required_contents": { + "exact_match": "dialog-component" + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.reviewer').outerText", + "required_contents": { + "must_include": [ + "Primer" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 335 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 668, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Submit a merge request for {{source_branch}} branch to be merged into {{target_branch}} branch, assign {{reviewer}} as the reviewer", + "instantiation_dict": { + "source_branch": "a11yproject.com/redesign", + "target_branch": "master", + "reviewer": "Justin Armstrong" + }, + "intent": "Submit a merge request for a11yproject.com/redesign branch to be merged into master branch, assign Justin Armstrong as the reviewer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/a11yproject/a11yproject.com/-/merge_requests", + "program_html": [ + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[0].outerText", + "required_contents": { + "exact_match": "redesign" + } + }, + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[1].outerText", + "required_contents": { + "exact_match": "main" + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.reviewer').outerText", + "required_contents": { + "must_include": [ + "Justin Armstrong" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 335 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 669, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/byteblaze/solarized-prism-theme", + "geolocation": null, + "intent_template": "Open a new issue to discuss the implementation of {{feature}}", + "instantiation_dict": { + "feature": "dark mode" + }, + "intent": "Open a new issue to discuss the implementation of dark mode", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/byteblaze/solarized-prism-theme/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": { + "must_include": [ + "implementation", + "dark mode" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 337 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 670, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/byteblaze/dotfiles", + "geolocation": null, + "intent_template": "Open a new issue to discuss the implementation of {{feature}}", + "instantiation_dict": { + "feature": "default plugins for .zsh" + }, + "intent": "Open a new issue to discuss the implementation of default plugins for .zsh", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/byteblaze/dotfiles/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": { + "must_include": [ + "implementation", + "default plugins", + "zsh" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 337 + }, + { + "sites": [ + "shopping", + "reddit" + ], + "task_id": 671, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Gather the titles of {{product}} reviews with {{rating}} rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on {{product}}\"", + "instantiation_dict": { + "product": "Sony Computer Entertainment VR", + "rating": "2 stars and less" + }, + "intent": "Gather the titles of Sony Computer Entertainment VR reviews with 2 stars and less rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on Sony Computer Entertainment VR\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/gaming", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__title').outerText", + "required_contents": { + "exact_match": "real user feedback on Sony Computer Entertainment VR" + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "must_include": [ + "didn't last a year without issues", + "Disappointing. Didn't last long before it stopped powering on and needed to be sent in for repair.", + "Received used items!!" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 101 + }, + { + "sites": [ + "shopping", + "reddit" + ], + "task_id": 672, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Gather the titles of {{product}} reviews with {{rating}} rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on {{product}}\"", + "instantiation_dict": { + "product": "Nintendo Switch Fortnite Wildcat Console EU", + "rating": "3 stars and less" + }, + "intent": "Gather the titles of Nintendo Switch Fortnite Wildcat Console EU reviews with 3 stars and less rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on Nintendo Switch Fortnite Wildcat Console EU\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/gaming", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__title').outerText", + "required_contents": { + "exact_match": "real user feedback on Nintendo Switch Fortnite Wildcat Console EU" + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "must_include": [ + "EU charger and wild cat card doesn\u2019t even work!", + "REFUND REJECTED", + "Charging port not compatible", + "not compatible in the US", + "Wildcard Bonus Credits Not Redeemable!", + "Code not available!!" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 101 + }, + { + "sites": [ + "shopping", + "reddit" + ], + "task_id": 673, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Gather the titles of {{product}} reviews with {{rating}} rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on {{product}}\"", + "instantiation_dict": { + "product": "Racing Wheel Overdrive for Xbox X", + "rating": "1 star" + }, + "intent": "Gather the titles of Racing Wheel Overdrive for Xbox X reviews with 1 star rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on Racing Wheel Overdrive for Xbox X\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/gaming", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__title').outerText", + "required_contents": { + "exact_match": "real user feedback on Racing Wheel Overdrive for Xbox X" + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "must_include": [ + "Unable to set neutral steering", + "Doesn\u2019t work with PC", + "Crazy problems in automatic mode", + "pedals stopped working", + "Only works with certain games" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 101 + }, + { + "sites": [ + "shopping", + "reddit" + ], + "task_id": 674, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Gather the titles of {{product}} reviews with {{rating}} rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on {{product}}\"", + "instantiation_dict": { + "product": "Doc and Pies Arcade Factory Cocktail Arcade Machine", + "rating": "3 stars and less" + }, + "intent": "Gather the titles of Doc and Pies Arcade Factory Cocktail Arcade Machine reviews with 3 stars and less rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on Doc and Pies Arcade Factory Cocktail Arcade Machine\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/gaming", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__title').outerText", + "required_contents": { + "exact_match": "real user feedback on Doc and Pies Arcade Factory Cocktail Arcade Machine" + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "must_include": [ + "Poorly Made Exterior. Consider a different Company.", + "piece of junk ,..can't believe I spent money on this !!!!", + "Based arrived broken but game itself works" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 101 + }, + { + "sites": [ + "shopping", + "reddit" + ], + "task_id": 675, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Gather the titles of {{product}} reviews with {{rating}} rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on {{product}}\"", + "instantiation_dict": { + "product": "HORI 3D Surround Gaming Neckset", + "rating": "2 stars and less" + }, + "intent": "Gather the titles of HORI 3D Surround Gaming Neckset reviews with 2 stars and less rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on HORI 3D Surround Gaming Neckset\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/gaming", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__title').outerText", + "required_contents": { + "exact_match": "real user feedback on HORI 3D Surround Gaming Neckset" + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "must_include": [ + "Not worth it for PC users", + "I really wanted to like this.", + "I wish this was better..." + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 101 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 676, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Lookup orders that are {{status}}", + "instantiation_dict": { + "status": "suspected of being fraudulent" + }, + "intent": "Lookup orders that are suspected of being fraudulent", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/sales/order/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div.admin__data-grid-filters-current\").outerText", + "required_contents": { + "must_include": [ + "Suspected Fraud" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 253 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 677, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Lookup orders that are {{status}}", + "instantiation_dict": { + "status": "processing" + }, + "intent": "Lookup orders that are processing", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/sales/order/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div.admin__data-grid-filters-current\").outerText", + "required_contents": { + "must_include": [ + "Processing" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 253 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 678, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Lookup orders that are {{status}}", + "instantiation_dict": { + "status": "canceled" + }, + "intent": "Lookup orders that are canceled", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/sales/order/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div.admin__data-grid-filters-current\").outerText", + "required_contents": { + "must_include": [ + "Canceled" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 253 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 679, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Lookup orders that are {{status}}", + "instantiation_dict": { + "status": "completed" + }, + "intent": "Lookup orders that are completed", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/sales/order/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div.admin__data-grid-filters-current\").outerText", + "required_contents": { + "must_include": [ + "Completed" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 253 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 680, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Lookup orders that are {{status}}", + "instantiation_dict": { + "status": "on hold" + }, + "intent": "Lookup orders that are on hold", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/sales/order/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div.admin__data-grid-filters-current\").outerText", + "required_contents": { + "must_include": [ + "On Hold" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 253 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 681, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Find a GitLab repository related to {{topic}} and make a Reddit post linking to it in a relevant subreddit", + "instantiation_dict": { + "topic": "gan implementation" + }, + "intent": "Find a GitLab repository related to gan implementation and make a Reddit post linking to it in a relevant subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/machinelearning |OR| http://localhost:28080/f/deeplearning |OR| http://localhost:28080/f/technology", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "gan implementation", + "http://localhost:28084/eriklindernoren/PyTorch-GAN" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 116 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 682, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Find a GitLab repository related to {{topic}} and make a Reddit post linking to it in a relevant subreddit", + "instantiation_dict": { + "topic": "chatGPT" + }, + "intent": "Find a GitLab repository related to chatGPT and make a Reddit post linking to it in a relevant subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/machinelearning |OR| http://localhost:28080/f/singularity |OR| http://localhost:28080/f/deeplearning |OR| http://localhost:28080/f/technology", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "chatgpt", + "http://localhost:28084/convexegg/chatgpt" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 116 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 683, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Find a GitLab repository related to {{topic}} and make a Reddit post linking to it in a relevant subreddit", + "instantiation_dict": { + "topic": "metaseq" + }, + "intent": "Find a GitLab repository related to metaseq and make a Reddit post linking to it in a relevant subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/machinelearning |OR| http://localhost:28080/f/deeplearning", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "metaseq", + "http://localhost:28084/root/metaseq" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 116 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 684, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Promote {{repo}} to subreddit {{subreddit}} with the description from the repo itself.", + "instantiation_dict": { + "repo": "byteblaze/cloud-to-butt", + "subreddit": "LifeProTips" + }, + "intent": "Promote byteblaze/cloud-to-butt to subreddit LifeProTips with the description from the repo itself.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/LifeProTips", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText + [...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "Chrome extension that replaces occurrences of 'the cloud' with 'my butt'", + "http://localhost:28084/byteblaze/cloud-to-butt" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 117 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 685, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Promote {{repo}} to subreddit {{subreddit}} with the description from the repo itself.", + "instantiation_dict": { + "repo": "byteblaze/dotfiles", + "subreddit": "aww" + }, + "intent": "Promote byteblaze/dotfiles to subreddit aww with the description from the repo itself.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/aww", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText + [...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "Computer setup", + "http://localhost:28084/byteblaze/dotfiles" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 117 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 686, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Promote {{repo}} to subreddit {{subreddit}} with the description from the repo itself.", + "instantiation_dict": { + "repo": "auth0/angular-storage", + "subreddit": "technology" + }, + "intent": "Promote auth0/angular-storage to subreddit technology with the description from the repo itself.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/technology", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText + [...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "A storage library for AngularJS done right", + "http://localhost:28084/auth0/angular-storage" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 117 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 687, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Promote {{repo}} to subreddit {{subreddit}} with the description from the repo itself.", + "instantiation_dict": { + "repo": "koush/AndroidAsync", + "subreddit": "funny" + }, + "intent": "Promote koush/AndroidAsync to subreddit funny with the description from the repo itself.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/funny", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText + [...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "Asynchronous socket, http(s) (client+server) and websocket library for android. Based on nio, not threads.", + "http://localhost:28084/koush/AndroidAsync" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 117 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 688, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Promote {{repo}} to subreddit {{subreddit}} with the description from the repo itself.", + "instantiation_dict": { + "repo": "lahwaacz/arch-wiki-docs", + "subreddit": "science" + }, + "intent": "Promote lahwaacz/arch-wiki-docs to subreddit science with the description from the repo itself.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/science", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText + [...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "A script to download pages from Arch Wiki for offline browsing", + "http://localhost:28084/lahwaacz/arch-wiki-docs" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 117 + }, + { + "sites": [ + "shopping" + ], + "task_id": 689, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Draft an email to the shop owner via their contact us function for a coupon as {{reason}}", + "instantiation_dict": { + "reason": "I am a loyal customer" + }, + "intent": "Draft an email to the shop owner via their contact us function for a coupon as I am a loyal customer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "coupon", + "I am a loyal customer" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 163 + }, + { + "sites": [ + "shopping" + ], + "task_id": 690, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Draft an email to the shop owner via their contact us function for a coupon as {{reason}}", + "instantiation_dict": { + "reason": "they promised me a coupon last time" + }, + "intent": "Draft an email to the shop owner via their contact us function for a coupon as they promised me a coupon last time", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "coupon", + "promised" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 163 + }, + { + "sites": [ + "shopping" + ], + "task_id": 691, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Draft an email to the shop owner via their contact us function for a coupon as {{reason}}", + "instantiation_dict": { + "reason": "I plan to make a bulk purchase" + }, + "intent": "Draft an email to the shop owner via their contact us function for a coupon as I plan to make a bulk purchase", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "coupon", + "bulk purchase" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 163 + }, + { + "sites": [ + "shopping" + ], + "task_id": 692, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Draft an email to the shop owner via their contact us function for a coupon as {{reason}}", + "instantiation_dict": { + "reason": "I am a student" + }, + "intent": "Draft an email to the shop owner via their contact us function for a coupon as I am a student", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "coupon", + "student" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 163 + }, + { + "sites": [ + "shopping" + ], + "task_id": 693, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Draft an email to the shop owner via their contact us function for a coupon as {{reason}}", + "instantiation_dict": { + "reason": "my refund is suppoed to be replaced by a coupon" + }, + "intent": "Draft an email to the shop owner via their contact us function for a coupon as my refund is suppoed to be replaced by a coupon", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "coupon", + "refund" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 163 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 694, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Add a simple product named {{product}} with {{stock}} in stock, available in size {{size}} and color {{color}}, priced at ${{price}}", + "instantiation_dict": { + "product": "Energy-Bulk Women Shirt", + "stock": "50", + "size": "S", + "color": "blue", + "price": "60" + }, + "intent": "Add a simple product named Energy-Bulk Women Shirt with 50 in stock, available in size S and color blue, priced at $60", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/catalog/product", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "60.00" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[name]\"').value", + "required_contents": { + "must_include": [ + "Energy-Bulk Women Shirt" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "50" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-role=\"selected-option\"').outerText", + "required_contents": { + "must_include": [ + "top" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[size]\"').value", + "required_contents": { + "exact_match": "167" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[color]\"').value", + "required_contents": { + "exact_match": "50" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-index=\"category_ids\"').outerText", + "required_contents": { + "must_include": [ + "tops" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 256 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 695, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Add a simple product named {{product}} with {{stock}} in stock, available in size {{size}} and color {{color}}, priced at ${{price}}", + "instantiation_dict": { + "product": "Energy-Bulk Man Yoga Pant", + "stock": "50", + "size": "38", + "color": "yellow", + "price": "69.99" + }, + "intent": "Add a simple product named Energy-Bulk Man Yoga Pant with 50 in stock, available in size 38 and color yellow, priced at $69.99", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/catalog/product", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "69.99" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[name]\"').value", + "required_contents": { + "must_include": [ + "Energy-Bulk Man Yoga Pant" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "50" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-role=\"selected-option\"').outerText", + "required_contents": { + "must_include": [ + "bottom" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[size]\"').value", + "required_contents": { + "exact_match": "179" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[color]\"').value", + "required_contents": { + "exact_match": "60" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-index=\"category_ids\"').outerText", + "required_contents": { + "must_include": [ + "bottoms" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 256 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 696, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Add a simple product named {{product}} with {{stock}} in stock, available in size {{size}} and color {{color}}, priced at ${{price}}", + "instantiation_dict": { + "product": "FancyBoy Man Causal Jeans", + "stock": "42", + "size": "34", + "color": "Blue", + "price": "169.99" + }, + "intent": "Add a simple product named FancyBoy Man Causal Jeans with 42 in stock, available in size 34 and color Blue, priced at $169.99", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/catalog/product", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"product[name]\"').value", + "required_contents": { + "must_include": [ + "FancyBoy Man Causal Jeans" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "42" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "169.99" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-role=\"selected-option\"').outerText", + "required_contents": { + "must_include": [ + "bottom" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[size]\"').value", + "required_contents": { + "exact_match": "177" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[color]\"').value", + "required_contents": { + "exact_match": "50" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-index=\"category_ids\"').outerText", + "required_contents": { + "must_include": [ + "bottoms" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 256 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 697, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Add a simple product named {{product}} with {{stock}} in stock, available in size {{size}} and color {{color}}, priced at ${{price}}", + "instantiation_dict": { + "product": "Swaatch Smart Watch", + "stock": "42", + "size": "uni-size", + "color": "Blue", + "price": "769.99" + }, + "intent": "Add a simple product named Swaatch Smart Watch with 42 in stock, available in size uni-size and color Blue, priced at $769.99", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/catalog/product", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"product[name]\"').value", + "required_contents": { + "must_include": [ + "Swaatch Smart Watch" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "42" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "769.99" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-role=\"selected-option\"').outerText", + "required_contents": { + "must_include": [ + "gear" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[color]\"').value", + "required_contents": { + "exact_match": "50" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-index=\"category_ids\"').outerText", + "required_contents": { + "must_include": [ + "watches" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 256 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 698, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Add a simple product named {{product}} with {{stock}} in stock, available in size {{size}} and color {{color}}, priced at ${{price}}", + "instantiation_dict": { + "product": "Lelelumon Yoga Mat", + "stock": "42", + "size": "uni-size", + "color": "black", + "price": "769.99" + }, + "intent": "Add a simple product named Lelelumon Yoga Mat with 42 in stock, available in size uni-size and color black, priced at $769.99", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/catalog/product", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"product[name]\"').value", + "required_contents": { + "must_include": [ + "Lelelumon Yoga Mat" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "42" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "769.99" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-role=\"selected-option\"').outerText", + "required_contents": { + "must_include": [ + "gear" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[color]\"').value", + "required_contents": { + "exact_match": "49" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-index=\"category_ids\"').outerText", + "required_contents": { + "must_include": [ + "fitness equipment" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 256 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 699, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Draft a new marketing price rule for {{topic}} that offers {{rule}} for all customers", + "instantiation_dict": { + "topic": "spring sale", + "rule": "a 20 percent discount site-wide" + }, + "intent": "Draft a new marketing price rule for spring sale that offers a 20 percent discount site-wide for all customers", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/sales_rule/promo_quote", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"[name='name'\").value", + "required_contents": { + "must_include": [ + "spring sale" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"website_ids\"').selectedIndex", + "required_contents": { + "exact_match": "0" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"customer_group_ids\"').selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"simple_action\"').value", + "prep_actions": ["document.querySelector('[data-index=\"actions\"]').querySelector('.admin__collapsible-title').click()"], + "required_contents": { + "exact_match": "by_percent" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"discount_amount\"').value", + "prep_actions": ["document.querySelector('[data-index=\"actions\"]').querySelector('.admin__collapsible-title').click()"], + "required_contents": { + "exact_match": "20" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 258 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 700, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Draft a new marketing price rule for {{topic}} that offers {{rule}} for all customers", + "instantiation_dict": { + "topic": "fall discount", + "rule": "$10 discount on checkout" + }, + "intent": "Draft a new marketing price rule for fall discount that offers $10 discount on checkout for all customers", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/sales_rule/promo_quote", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"[name='name'\").value", + "required_contents": { + "must_include": [ + "fall discount" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"website_ids\"').selectedIndex", + "required_contents": { + "exact_match": "0" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"customer_group_ids\"').selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"simple_action\"').value", + "prep_actions": ["document.querySelector('[data-index=\"actions\"]').querySelector('.admin__collapsible-title').click()"], + "required_contents": { + "exact_match": "cart_fixed" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"discount_amount\"').value", + "prep_actions": ["document.querySelector('[data-index=\"actions\"]').querySelector('.admin__collapsible-title').click()"], + "required_contents": { + "exact_match": "10" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 258 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 701, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Draft a new marketing price rule for {{topic}} that offers {{rule}} for all customers", + "instantiation_dict": { + "topic": "Mother's day sale", + "rule": "$15 discount on checkout" + }, + "intent": "Draft a new marketing price rule for Mother's day sale that offers $15 discount on checkout for all customers", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/sales_rule/promo_quote", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"[name='name'\").value", + "required_contents": { + "must_include": [ + "Mother's day sale" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"website_ids\"').selectedIndex", + "required_contents": { + "exact_match": "0" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"customer_group_ids\"').selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"simple_action\"').value", + "prep_actions": ["document.querySelector('[data-index=\"actions\"]').querySelector('.admin__collapsible-title').click()"], + "required_contents": { + "exact_match": "cart_fixed" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"discount_amount\"').value", + "prep_actions": ["document.querySelector('[data-index=\"actions\"]').querySelector('.admin__collapsible-title').click()"], + "required_contents": { + "exact_match": "15" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 258 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 702, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Draft a new marketing price rule for {{topic}} that offers {{rule}} for all customers", + "instantiation_dict": { + "topic": "Pride Month", + "rule": "45% off on all products" + }, + "intent": "Draft a new marketing price rule for Pride Month that offers 45% off on all products for all customers", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/sales_rule/promo_quote", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"[name='name'\").value", + "required_contents": { + "must_include": [ + "Pride Month" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"website_ids\"').selectedIndex", + "required_contents": { + "exact_match": "0" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"customer_group_ids\"').selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"simple_action\"').value", + "prep_actions": ["document.querySelector('[data-index=\"actions\"]').querySelector('.admin__collapsible-title').click()"], + "required_contents": { + "exact_match": "by_percent" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"discount_amount\"').value", + "prep_actions": ["document.querySelector('[data-index=\"actions\"]').querySelector('.admin__collapsible-title').click()"], + "required_contents": { + "exact_match": "45" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 258 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 703, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Draft a new marketing price rule for {{topic}} that offers {{rule}} for all customers", + "instantiation_dict": { + "topic": "Thanks giving sale", + "rule": "$40 discount on checkout" + }, + "intent": "Draft a new marketing price rule for Thanks giving sale that offers $40 discount on checkout for all customers", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/sales_rule/promo_quote", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"[name='name'\").value", + "required_contents": { + "must_include": [ + "Thanks giving sale" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"website_ids\"').selectedIndex", + "required_contents": { + "exact_match": "0" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"customer_group_ids\"').selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"simple_action\"').value", + "prep_actions": ["document.querySelector('[data-index=\"actions\"]').querySelector('.admin__collapsible-title').click()"], + "required_contents": { + "exact_match": "cart_fixed" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"discount_amount\"').value", + "prep_actions": ["document.querySelector('[data-index=\"actions\"]').querySelector('.admin__collapsible-title').click()"], + "required_contents": { + "exact_match": "40" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 258 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 704, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Today is 3/15/2023, generate a {{report}} {{time_span}}", + "instantiation_dict": { + "report": "sales order report", + "time_span": "for last month" + }, + "intent": "Today is 3/15/2023, generate a sales order report for last month", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/reports/report_sales/sales", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "2/1/23" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "2/28/23" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 268 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 705, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Today is 3/15/2023, generate a {{report}} {{time_span}}", + "instantiation_dict": { + "report": "sales order report", + "time_span": "over the last 45 days" + }, + "intent": "Today is 3/15/2023, generate a sales order report over the last 45 days", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/reports/report_sales/sales", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "1/29/23" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "3/15/23" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 268 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 706, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Today is 3/15/2023, generate a {{report}} {{time_span}}", + "instantiation_dict": { + "report": "refund report", + "time_span": "for Q1" + }, + "intent": "Today is 3/15/2023, generate a refund report for Q1", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/reports/report_sales/refunded", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "1/1/23" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "3/31/23" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 268 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 707, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Today is 3/15/2023, generate a {{report}} {{time_span}}", + "instantiation_dict": { + "report": "sales order report", + "time_span": "for last year" + }, + "intent": "Today is 3/15/2023, generate a sales order report for last year", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/reports/report_sales/sales", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "1/1/2022" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "12/31/2022" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 268 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 708, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Today is 3/15/2023, generate a {{report}} {{time_span}}", + "instantiation_dict": { + "report": "tax report", + "time_span": "for this year" + }, + "intent": "Today is 3/15/2023, generate a tax report for this year", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/reports/report_sales/tax/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "1/1/2023" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "12/31/2023" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 268 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 709, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Create an {{type}} report from {{start_date}} to {{end_date}}", + "instantiation_dict": { + "type": "orders", + "start_date": "beginning of May 2021", + "end_date": "end of March 2022" + }, + "intent": "Create an orders report from beginning of May 2021 to end of March 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/reports/report_sales/sales", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "5/1/2021" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "3/31/2022" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 271 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 710, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Create a {{type}} report from {{start_date}} to {{end_date}}", + "instantiation_dict": { + "type": "shipping", + "start_date": "08/05/2022", + "end_date": "03/01/2023" + }, + "intent": "Create a shipping report from 08/05/2022 to 03/01/2023", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/reports/report_sales/shipping", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "8/5/22" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "3/1/23" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 271 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 711, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Create a {{type}} report from {{start_date}} to {{end_date}}", + "instantiation_dict": { + "type": "product view", + "start_date": "07/05/2021", + "end_date": "05/31/2023" + }, + "intent": "Create a product view report from 07/05/2021 to 05/31/2023", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/reports/report_product/viewed/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "7/5/21" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "5/31/23" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 271 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 712, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Create a {{type}} report from {{start_date}} to {{end_date}}", + "instantiation_dict": { + "type": "coupons", + "start_date": "05/01/2021", + "end_date": "05/15/2023" + }, + "intent": "Create a coupons report from 05/01/2021 to 05/15/2023", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/reports/report_sales/coupons/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "5/1/21" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "5/15/23" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 271 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 713, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Create a {{type}} report from {{start_date}} to {{end_date}}", + "instantiation_dict": { + "type": "best sellers", + "start_date": "05/01/2022", + "end_date": "05/31/2023" + }, + "intent": "Create a best sellers report from 05/01/2022 to 05/31/2023", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/reports/report_sales/bestsellers/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "5/1/22" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "5/31/23" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 271 + }, + { + "sites": [ + "reddit" + ], + "task_id": 714, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Thumbs down the top {{k}} post ever in {{subreddit}}.", + "instantiation_dict": { + "subreddit": "gadgets", + "k": "1" + }, + "intent": "Thumbs down the top 1 post ever in gadgets.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/gadgets/19459/a-custom-gaming-pc-built-inside-a-vintage-1940s-motorola", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 24 + }, + { + "sites": [ + "reddit" + ], + "task_id": 715, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Thumbs down the top {{k}} post ever in {{subreddit}}.", + "instantiation_dict": { + "subreddit": "history", + "k": "2" + }, + "intent": "Thumbs down the top 2 post ever in history.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/history/84338/the-scientist-who-discovered-sperm-was-so-grossed-out-he", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/history/105990/4-500-year-old-sumerian-temple-dedicated-to-mighty-thunder", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 24 + }, + { + "sites": [ + "reddit" + ], + "task_id": 716, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Thumbs down the top {{k}} post ever in {{subreddit}}.", + "instantiation_dict": { + "k": "3", + "subreddit": "books" + }, + "intent": "Thumbs down the top 3 post ever in books.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/books/81371/the-letters-of-t-s-eliot-to-emily-hale-that-were-kept-sealed", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/books/59421/friendly-reminder-bookshop-org-exists", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/books/59447/appalachian-prison-book-project-seeks-notebook-donations-the", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 24 + }, + { + "sites": [ + "reddit" + ], + "task_id": 717, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Thumbs down the top {{k}} post ever in {{subreddit}}.", + "instantiation_dict": { + "k": "4", + "subreddit": "movies" + }, + "intent": "Thumbs down the top 4 post ever in movies.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/movies/86174/who-will-win-the-oscar-for-actress-in-a-supporting-role", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/movies/86029/who-will-win-the-oscar-for-film-editing", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/movies/86055/cindy-williams-dies-laverne-amp-shirley-star-who-appeared-in", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/movies/42682/michelle-yeoh-to-receive-palm-springs-film-festival-s", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 24 + }, + { + "sites": [ + "reddit" + ], + "task_id": 718, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Thumbs down the top {{k}} post ever in {{subreddit}}.", + "instantiation_dict": { + "k": "5", + "subreddit": "technology" + }, + "intent": "Thumbs down the top 5 post ever in technology.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/technology/48670/brain-cancer-vaccine-succeeds-at-prolonging-survival-in", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/technology/134696/india-cuts-internet-for-27-million-people-amid-search-for", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/technology/48785/us-judge-orders-amazon-to-cease-and-desist-anti-union", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/technology/70354/activision-s-boston-studio-workers-announce-unionization", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/technology/70233/social-media-influencers-are-charged-with-feeding-followers", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 24 + }, + { + "sites": [ + "reddit" + ], + "task_id": 719, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Like all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "subreddit": "wallstreetbets", + "user": "ThetaGang_wsb" + }, + "intent": "Like all submissions created by ThetaGang_wsb in subreddit wallstreetbets", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/wallstreetbets/29478/how-will-airbnb-close-following-their-earnings-report-on", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/wallstreetbets/29458/how-much-will-the-federal-reserve-raise-interest-rates-in", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 25 + }, + { + "sites": [ + "reddit" + ], + "task_id": 720, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Like all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "subreddit": "earthporn", + "user": "CameronKelsey" + }, + "intent": "Like all submissions created by CameronKelsey in subreddit earthporn", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/EarthPorn/98332/my-favorite-place-on-the-planet-henry-s-fork-of-the-snake", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/EarthPorn/98297/2-years-later-this-is-still-one-of-the-most-incredible", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/EarthPorn/98256/i-can-t-wait-for-all-this-green-to-start-coming-back-little", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 25 + }, + { + "sites": [ + "reddit" + ], + "task_id": 721, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Like all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "UniversityofBath", + "subreddit": "IAmA" + }, + "intent": "Like all submissions created by UniversityofBath in subreddit IAmA", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/IAmA/119742/hi-i-m-vienne-a-doctoral-student-at-the-university-of-bath-i", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/IAmA/119719/hello-reddit-i-m-nazia-mehrban-a-lecturer-in-biotechnology", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/IAmA/119714/i-m-ellie-jarvis-she-her-a-2nd-year-phd-student-in-the", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/IAmA/55155/hi-i-m-dr-lucy-maddox-from-bath-university-uk-i-m-a-clinical", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/IAmA/55142/we-re-sadeka-nujhat-hannah-leese-and-sandhya-moise-from-the", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/IAmA/34032/we-re-sandhya-moise-david-phillips-and-chan-lee-from-the", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/IAmA/13175/hi-i-m-kit-yates-i-m-a-mathematical-biologist-at-the", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/IAmA/13170/hello-i-m-dr-sara-fontani-from-the-university-of", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 25 + }, + { + "sites": [ + "reddit" + ], + "task_id": 722, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Like all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "Don_Gato1", + "subreddit": "new york" + }, + "intent": "Like all submissions created by Don_Gato1 in subreddit new york", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/nyc/44650/fox-news-hosts-cast-new-york-as-crime-ridden-and-chaotic", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 25 + }, + { + "sites": [ + "reddit" + ], + "task_id": 723, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Like all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "FTorrez81", + "subreddit": "iphone13" + }, + "intent": "Like all submissions created by FTorrez81 in subreddit iphone13", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "reference_answer_raw_annotation": "N/A", + "string_note": "FTorrez81 does not have any submissions in iphone13" + }, + "intent_template_id": 25, + "string_note": "FTorrez81 has no submissions in subreddit iphone13" + }, + { + "sites": [ + "reddit" + ], + "task_id": 724, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Like all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "Hrekires", + "subreddit": "news" + }, + "intent": "Like all submissions created by Hrekires in subreddit news", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/news/129816/gov-whitmer-signs-bills-to-repeal-right-to-work-restore", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/news/129808/disney-world-deal-with-union-will-raise-minimum-wage-to-18", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/news/129794/judge-halts-wyoming-abortion-ban-days-after-it-took-effect", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/news/129783/don-t-say-gay-lawmaker-pleads-guilty-to-covid-relief-fraud", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/news/129594/arizona-gov-katie-hobbs-refuses-to-proceed-with-execution", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/news/129508/tennessee-governor-oks-bill-to-cut-nashville-council-in-half", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/news/43839/philadelphia-da-larry-krasner-impeached-by-pa-house", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/news/43781/crypto-giant-ftx-to-file-for-bankruptcy-ceo-sam-bankman", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/news/43572/sec-doj-investigating-crypto-platform-ftx", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/news/43558/kansas-gov-laura-kelly-wins-re-election-defeating-gop", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 25 + }, + { + "sites": [ + "reddit" + ], + "task_id": 725, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "DisLike all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "subreddit": "massachusetts", + "user": "RickyDontLoseThat" + }, + "intent": "DisLike all submissions created by RickyDontLoseThat in subreddit massachusetts", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/massachusetts/84954/the-last-of-lincoln", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 1510 + }, + { + "sites": [ + "reddit" + ], + "task_id": 726, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "DisLike all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "subreddit": "earthporn", + "user": "jacyanthis" + }, + "intent": "DisLike all submissions created by jacyanthis in subreddit earthporn", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "jacyanthis does not have any submissions in earthporn", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 1510 + }, + { + "sites": [ + "reddit" + ], + "task_id": 727, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "DisLike all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "PatientBuilder499", + "subreddit": "videos" + }, + "intent": "DisLike all submissions created by PatientBuilder499 in subreddit videos", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/videos/115139/hundreds-of-civilian-turkish-volunteers-waiting-to-be-sent", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 1510 + }, + { + "sites": [ + "reddit" + ], + "task_id": 728, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "DisLike all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "sirbarani", + "subreddit": "sports" + }, + "intent": "DisLike all submissions created by sirbarani in subreddit sports", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/sports/48303/iran-football-legend-daei-will-not-attend-world-cup-amid", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 1510 + }, + { + "sites": [ + "reddit" + ], + "task_id": 729, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "DisLike all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "AdamCannon", + "subreddit": "UpliftingNews" + }, + "intent": "DisLike all submissions created by AdamCannon in subreddit UpliftingNews", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/UpliftingNews/16087/same-sex-marriage-is-now-legal-in-all-of-mexico-s-states", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 1510 + }, + { + "sites": [ + "reddit" + ], + "task_id": 730, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "DisLike all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "Hrekires", + "subreddit": "news" + }, + "intent": "DisLike all submissions created by Hrekires in subreddit news", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/news/129816/gov-whitmer-signs-bills-to-repeal-right-to-work-restore", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/news/129808/disney-world-deal-with-union-will-raise-minimum-wage-to-18", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/news/129794/judge-halts-wyoming-abortion-ban-days-after-it-took-effect", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/news/129783/don-t-say-gay-lawmaker-pleads-guilty-to-covid-relief-fraud", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/news/129594/arizona-gov-katie-hobbs-refuses-to-proceed-with-execution", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/news/129508/tennessee-governor-oks-bill-to-cut-nashville-council-in-half", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/news/43839/philadelphia-da-larry-krasner-impeached-by-pa-house", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/news/43781/crypto-giant-ftx-to-file-for-bankruptcy-ceo-sam-bankman", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/news/43572/sec-doj-investigating-crypto-platform-ftx", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/news/43558/kansas-gov-laura-kelly-wins-re-election-defeating-gop", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 1510 + }, + { + "sites": [ + "reddit" + ], + "task_id": 731, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Edit my post on {{post}} by adding a line to the body that says \"{{content}}\"", + "instantiation_dict": { + "post": "Nvidia RTX 4090", + "content": "EDIT: This news aged well" + }, + "intent": "Edit my post on Nvidia RTX 4090 by adding a line to the body that says \"EDIT: This news aged well\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/MachineLearning/1/nvidia-rtx-4090", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "must_include": [ + "EDIT: This news aged well", + "Crazy device for ML!" + ] + } + } + ] + }, + "intent_template_id": 27 + }, + { + "sites": [ + "reddit" + ], + "task_id": 732, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Edit my post on {{post}} by adding a line to the body that says \"{{content}}\"", + "instantiation_dict": { + "post": "The Night Agent", + "content": "Done watching, pretty cool!" + }, + "intent": "Edit my post on The Night Agent by adding a line to the body that says \"Done watching, pretty cool!\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/television/134868/the-night-agent-renewed-for-season-2-at-netflix", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "exact_match": "Done watching, pretty cool!" + } + } + ] + }, + "intent_template_id": 27 + }, + { + "sites": [ + "reddit" + ], + "task_id": 733, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Edit my post on {{post}} by adding a line to the body that says \"{{content}}\"", + "instantiation_dict": { + "post": "Star Trek Starfleet Academy series", + "content": "Every watch makes me feel like a kid again" + }, + "intent": "Edit my post on Star Trek Starfleet Academy series by adding a line to the body that says \"Every watch makes me feel like a kid again\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/television/135201/star-trek-starfleet-academy-series-from-alex-kurtzman-and", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "exact_match": "Every watch makes me feel like a kid again" + } + } + ] + }, + "intent_template_id": 27 + }, + { + "sites": [ + "reddit" + ], + "task_id": 734, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Edit my post on {{post}} by adding a line to the body that says \"{{content}}\"", + "instantiation_dict": { + "post": "Ted Lasso", + "content": "Done watching. I love the renew!" + }, + "intent": "Edit my post on Ted Lasso by adding a line to the body that says \"Done watching. I love the renew!\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/television/135156/ted-lasso-season-3-premiere-scores-870k-u-s-households-up-59", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "exact_match": "Done watching. I love the renew!" + } + } + ] + }, + "intent_template_id": 27 + }, + { + "sites": [ + "reddit" + ], + "task_id": 735, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Edit my post on {{post}} by adding a line to the body that says \"{{content}}\"", + "instantiation_dict": { + "post": "Lord of the Rings", + "content": "The cast is amazing!" + }, + "intent": "Edit my post on Lord of the Rings by adding a line to the body that says \"The cast is amazing!\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/television/135152/lord-of-the-rings-the-rings-of-power-season-2-cast-adds", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "exact_match": "The cast is amazing!" + } + } + ] + }, + "intent_template_id": 27 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 736, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Make the LICENSE of {{repo}} to MIT license.", + "instantiation_dict": { + "repo": "gimmiethat.space and dotfiles" + }, + "intent": "Make the LICENSE of gimmiethat.space and dotfiles to MIT license.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/gimmiethat.space/-/blob/main/LICENSE", + "locator": "", + "required_contents": { + "must_include": [ + "MIT license", + "The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software." + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/dotfiles/-/blob/main/LICENSE", + "locator": "", + "required_contents": { + "must_include": [ + "MIT license", + "The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software." + ] + } + } + ] + }, + "intent_template_id": 355 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 737, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Show me the way from {{location}} to the home stadium of {{sport_team}} {{time}}", + "instantiation_dict": { + "location": "Carnegie Mellon University", + "sport_team": "Philadelphia 76ers", + "time": "" + }, + "intent": "Show me the way from Carnegie Mellon University to the home stadium of Philadelphia 76ers ", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Carnegie Mellon University", + "Pittsburgh" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Wells Fargo Center", + "South Philadelphia Sports Complex" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + } + ] + }, + "intent_template_id": 94 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 738, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Show me the way from {{location}} to the home stadium of {{sport_team}} {{time}}", + "instantiation_dict": { + "location": "Carnegie Mellon University", + "sport_team": "Philadelphia 76ers", + "time": "in the 70th" + }, + "intent": "Show me the way from Carnegie Mellon University to the home stadium of Philadelphia 76ers in the 70th", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Carnegie Mellon University", + "Pittsburgh" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "3601 South Broad Street", + "South Philadelphia" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + } + ] + }, + "intent_template_id": 94 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 739, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Show me the way from {{location}} to the home stadium of {{sport_team}} {{time}}", + "instantiation_dict": { + "location": "Carnegie Mellon University", + "sport_team": "Yankees", + "time": "in the 80th" + }, + "intent": "Show me the way from Carnegie Mellon University to the home stadium of Yankees in the 80th", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Carnegie Mellon University", + "Pittsburgh" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Yankee Stadium", + "East 161st Street" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + } + ] + }, + "intent_template_id": 94 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 740, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Show me the way from {{location}} to the home stadium of {{sport_team}} {{time}}", + "instantiation_dict": { + "location": "Carnegie Mellon University", + "sport_team": "NYC NBA team", + "time": "" + }, + "intent": "Show me the way from Carnegie Mellon University to the home stadium of NYC NBA team ", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Carnegie Mellon University", + "Pittsburgh" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Madison Square Garden", + "Pennsylvania Plaza", + "Manhattan", + "New York" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + } + ] + }, + "intent_template_id": 94 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 741, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Show me the way from {{location}} to the home stadium of {{sport_team}} {{time}}", + "instantiation_dict": { + "location": "Carnegie Mellon University", + "sport_team": "Boston home NBA team", + "time": "" + }, + "intent": "Show me the way from Carnegie Mellon University to the home stadium of Boston home NBA team ", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Carnegie Mellon University", + "Pittsburgh" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "TD Garden", + "Boston", + "Massachusetts" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + } + ] + }, + "intent_template_id": 94 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 742, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a new {{scope}} project \"planner\" and add {{account_list}} as members", + "instantiation_dict": { + "scope": "private", + "account_list": "Abishek, Vinta" + }, + "intent": "Create a new private project \"planner\" and add Abishek, Vinta as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/planner", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "private" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/planner/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@abisubramanya27", + "@vinta" + ] + } + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 743, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a new {{scope}} project \"web_arena\" and add {{account_list}} as members", + "instantiation_dict": { + "scope": "public", + "account_list": "Abishek, Vinta" + }, + "intent": "Create a new public project \"web_arena\" and add Abishek, Vinta as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/web_arena", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "public" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/web_arena/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@abisubramanya27", + "@vinta" + ] + } + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 744, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a new {{scope}} project \"AutoAGI\" and add {{account_list}} as members", + "instantiation_dict": { + "scope": "public", + "account_list": "primer" + }, + "intent": "Create a new public project \"AutoAGI\" and add primer as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/AutoAGI", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "public" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/AutoAGI/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@primer" + ] + } + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 745, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a new {{scope}} project \"awesome-llms\" and add {{account_list}} as members", + "instantiation_dict": { + "scope": "public", + "account_list": "primer, convexegg, abishek" + }, + "intent": "Create a new public project \"awesome-llms\" and add primer, convexegg, abishek as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/awesome-llms", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "public" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/awesome-llms/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@primer", + "@convexegg", + "@abisubramanya27" + ] + } + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 746, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a new {{scope}} project \"llm_bulk_inference\" and add {{account_list}} as members", + "instantiation_dict": { + "scope": "private", + "account_list": "primer, convexegg, abishek" + }, + "intent": "Create a new private project \"llm_bulk_inference\" and add primer, convexegg, abishek as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/llm_bulk_inference", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/llm_bulk_inference/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@primer", + "@convexegg", + "@abisubramanya27" + ] + } + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 747, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Start a private project {{project_name}} with {{template}} template and add {{account_list}} as members", + "instantiation_dict": { + "project_name": "awesome_web_agents", + "template": "blank", + "account_list": "Abishek, Vinta" + }, + "intent": "Start a private project awesome_web_agents with blank template and add Abishek, Vinta as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/awesome_web_agents", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/awesome_web_agents/-/commits", + "locator": "", + "required_contents": { + "must_include": [ + "Initial commit" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/awesome_web_agents/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@abisubramanya27", + "@vinta" + ] + } + } + ] + }, + "intent_template_id": 2100 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 748, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Start a private project {{project_name}} with {{template}} template and add {{account_list}} as members", + "instantiation_dict": { + "project_name": "web_agent_android_xl", + "template": "Android", + "account_list": "primer, convexegg, abishek" + }, + "intent": "Start a private project web_agent_android_xl with Android template and add primer, convexegg, abishek as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/web_agent_android_xl", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/web_agent_android_xl/-/commits", + "locator": "", + "required_contents": { + "must_include": [ + "Initialized from 'Android' project template" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/web_agent_android_xl/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@primer", + "@convexegg", + "@abisubramanya27" + ] + } + } + ] + }, + "intent_template_id": 2100 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 749, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Start a private project {{project_name}} with {{template}} template and add {{account_list}} as members", + "instantiation_dict": { + "project_name": "project_site", + "template": "NodeJS", + "account_list": "primer, convexegg, vinta" + }, + "intent": "Start a private project project_site with NodeJS template and add primer, convexegg, vinta as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/project_site", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/project_site/-/commits", + "locator": "", + "required_contents": { + "must_include": [ + "Initialized from 'NodeJS Express' project template" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/project_site/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@primer", + "@convexegg", + "@vinta" + ] + } + } + ] + }, + "intent_template_id": 2100 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 750, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Start a private project {{project_name}} with {{template}} template and add {{account_list}} as members", + "instantiation_dict": { + "project_name": "agi_index", + "template": "HTML", + "account_list": "Vinta Chen" + }, + "intent": "Start a private project agi_index with HTML template and add Vinta Chen as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/agi_index", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/agi_index", + "locator": "document.querySelector('.home-panel-description-markdown').outerText", + "required_contents": { + "must_include": [ + "Example plain HTML site using GitLab Pages: https://pages.gitlab.io/plain-html |OR| A plain HTML site that uses Netlify for CI/CD instead of GitLab, but still with all the other great GitLab features." + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/agi_index/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "Vinta Chen" + ] + } + } + ] + }, + "intent_template_id": 2100 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 751, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Start a private project {{project_name}} with {{template}} template and add {{account_list}} as members", + "instantiation_dict": { + "project_name": "AGISite", + "template": "JEKYLL", + "account_list": "Rohan and Vinta" + }, + "intent": "Start a private project AGISite with JEKYLL template and add Rohan and Vinta as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/AGISite", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/AGISite", + "locator": "document.querySelector('.home-panel-description-markdown').outerText", + "required_contents": { + "must_include": [ + "Example Jekyll site using GitLab Pages: https://pages.gitlab.io/jekyll |OR| A Jekyll site that uses Netlify for CI/CD instead of GitLab, but still with all the other great GitLab features." + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/AGISite/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@Seirdy", + "@vinta" + ] + } + } + ] + }, + "intent_template_id": 2100 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 752, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a private {{template}} repository called \"{{project_name}}\" using the right template to speed up development.", + "instantiation_dict": { + "project_name": "web_agent", + "template": "blank" + }, + "intent": "Create a private blank repository called \"web_agent\" using the right template to speed up development.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/web_agent", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/web_agent/-/commits", + "locator": "", + "required_contents": { + "must_include": [ + "Initial commit" + ] + } + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 753, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a private {{template}} repository called \"{{project_name}}\" using the right template to speed up development.", + "instantiation_dict": { + "project_name": "web_agent_android_xs", + "template": "Android" + }, + "intent": "Create a private Android repository called \"web_agent_android_xs\" using the right template to speed up development.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/web_agent_android_xs", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/web_agent_android_xs/-/commits", + "locator": "", + "required_contents": { + "must_include": [ + "Initialized from 'Android' project template" + ] + } + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 754, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a private {{template}} repository called \"{{project_name}}\" using the right template to speed up development.", + "instantiation_dict": { + "project_name": "web_agent_nodejs", + "template": "NodeJS" + }, + "intent": "Create a private NodeJS repository called \"web_agent_nodejs\" using the right template to speed up development.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/web_agent_nodejs", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/web_agent_nodejs/-/commits", + "locator": "", + "required_contents": { + "must_include": [ + "Initialized from 'NodeJS Express' project template" + ] + } + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 755, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a private {{template}} repository called \"{{project_name}}\" using the right template to speed up development.", + "instantiation_dict": { + "project_name": "web_agent_index", + "template": "HTML" + }, + "intent": "Create a private HTML repository called \"web_agent_index\" using the right template to speed up development.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/web_agent_index", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/web_agent_index", + "locator": "document.querySelector('.home-panel-description-markdown').outerText", + "required_contents": { + "must_include": [ + "Example plain HTML site using GitLab Pages: https://pages.gitlab.io/plain-html |OR| A plain HTML site that uses Netlify for CI/CD instead of GitLab, but still with all the other great GitLab features." + ] + } + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 756, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a private {{template}} repository called \"{{project_name}}\" using the right template to speed up development.", + "instantiation_dict": { + "project_name": "11711_gitlab", + "template": "JEKYLL" + }, + "intent": "Create a private JEKYLL repository called \"11711_gitlab\" using the right template to speed up development.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/11711_gitlab", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/11711_gitlab", + "locator": "document.querySelector('.home-panel-description-markdown').outerText", + "required_contents": { + "must_include": [ + "Example Jekyll site using GitLab Pages: https://pages.gitlab.io/jekyll |OR| A Jekyll site that uses Netlify for CI/CD instead of GitLab, but still with all the other great GitLab features." + ] + } + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "map" + ], + "task_id": 757, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Show me the path and travel time from {{city1}} to {{city2}}.", + "instantiation_dict": { + "city1": "home of the 1980 Super Bowl champions", + "city2": "home of the 1991 Super Bowl champions" + }, + "intent": "Show me the path and travel time from home of the 1980 Super Bowl champions to home of the 1991 Super Bowl champions.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Pittsburgh" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "New York" + ] + } + } + ] + }, + "intent_template_id": 42 + }, + { + "sites": [ + "map" + ], + "task_id": 758, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Show me the path and travel time from {{city1}} to {{city2}}.", + "instantiation_dict": { + "city1": "the big apple", + "city2": "biggest city in Maine" + }, + "intent": "Show me the path and travel time from the big apple to biggest city in Maine.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "New York" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Portland", + "Maine" + ] + } + } + ] + }, + "intent_template_id": 42 + }, + { + "sites": [ + "map", + "shopping_admin" + ], + "task_id": 759, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Show me the route and driving time from {{city1}} to {{city2}}", + "instantiation_dict": { + "city1": "the city where my E-commerce customer Sophia Young lives", + "city2": "New York City" + }, + "intent": "Show me the route and driving time from the city where my E-commerce customer Sophia Young lives to New York City", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Boston" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "New York" + ] + } + } + ] + }, + "intent_template_id": 42 + }, + { + "sites": [ + "map", + "shopping_admin" + ], + "task_id": 760, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Show me the route and driving time from {{city1}} to {{city2}}", + "instantiation_dict": { + "city1": "Allentown, PA", + "city2": "the city where my E-commerce customer Amanda Kim lives" + }, + "intent": "Show me the route and driving time from Allentown, PA to the city where my E-commerce customer Amanda Kim lives", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Allentown" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Hoboken", + "New Jersey" + ] + } + } + ] + }, + "intent_template_id": 42 + }, + { + "sites": [ + "map" + ], + "task_id": 761, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Get directions from {{location/address_1}} to {{location/address_2}} using {{transportation}} options.", + "instantiation_dict": { + "location/address_1": "Carnegie Science Museum", + "location/address_2": "Hunt library CMU", + "transportation": "walk" + }, + "intent": "Get directions from Carnegie Science Museum to Hunt library CMU using walk options.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "2" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Carnegie Science Center", + "Allegheny County", + "Pittsburgh" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Hunt Library", + "Pittsburgh" + ] + } + } + ] + }, + "intent_template_id": 54 + }, + { + "sites": [ + "map" + ], + "task_id": 762, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Get directions from {{location/address_1}} to {{location/address_2}} using {{transportation}} options.", + "instantiation_dict": { + "location/address_1": "Carnegie Music Hall in NYC", + "location/address_2": "Carnegie Mellon University", + "transportation": "driving" + }, + "intent": "Get directions from Carnegie Music Hall in NYC to Carnegie Mellon University using driving options.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Carnegie Hall", + "West 57th Street", + "Manhattan", + "New York" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Carnegie Mellon University", + "Pittsburgh" + ] + } + } + ] + }, + "intent_template_id": 54 + }, + { + "sites": [ + "map" + ], + "task_id": 763, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Find the walkway to the closest {{store}} from {{location}}.", + "instantiation_dict": { + "store": "Trader Joe's", + "location": "401 Shady Ave, Pittsburgh" + }, + "intent": "Find the walkway to the closest Trader Joe's from 401 Shady Ave, Pittsburgh.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "2" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "401, Shady Avenue, Shadyside" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Trader Joe's, 6343, Penn Avenue, East Liberty" + ] + } + } + ] + }, + "intent_template_id": 75 + }, + { + "sites": [ + "map" + ], + "task_id": 764, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Find the walkway to the closest {{store}} from {{location}}.", + "instantiation_dict": { + "store": "Target", + "location": "401 Shady Ave, Pittsburgh" + }, + "intent": "Find the walkway to the closest Target from 401 Shady Ave, Pittsburgh.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "2" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "401, Shady Avenue, Shadyside" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Target, 6231, Penn Avenue, East Liberty" + ] + } + } + ] + }, + "intent_template_id": 75 + }, + { + "sites": [ + "map" + ], + "task_id": 765, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Find the walkway to the closest {{store}} from {{location}}.", + "instantiation_dict": { + "store": "Japanese food market", + "location": "401 Shady Ave, Pittsburgh" + }, + "intent": "Find the walkway to the closest Japanese food market from 401 Shady Ave, Pittsburgh.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "2" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "401, Shady Avenue, Shadyside" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Tokyo Japanese Food Store, 5855, Ellsworth Avenue, Shadyside" + ] + } + } + ] + }, + "intent_template_id": 75 + }, + { + "sites": [ + "map" + ], + "task_id": 766, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Find the walkway to the closest {{store}} from {{location}}.", + "instantiation_dict": { + "store": "grocessory owned by Amazon", + "location": "401 Shady Ave, Pittsburgh" + }, + "intent": "Find the walkway to the closest grocessory owned by Amazon from 401 Shady Ave, Pittsburgh.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "2" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "401, Shady Avenue, Shadyside" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Whole Foods Market, 5700, Penn Avenue, East Liberty" + ] + } + } + ] + }, + "intent_template_id": 75 + }, + { + "sites": [ + "map" + ], + "task_id": 767, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Find the walkway to the closest {{store}} from {{location}}.", + "instantiation_dict": { + "store": "chain grocessory owned by a local business", + "location": "401 Shady Ave, Pittsburgh" + }, + "intent": "Find the walkway to the closest chain grocessory owned by a local business from 401 Shady Ave, Pittsburgh.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "2" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "401, Shady Avenue, Shadyside" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Giant Eagle, 5550, Centre Avenue, Shadyside" + ] + } + } + ] + }, + "intent_template_id": 75 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 768, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "{{quantity}} {{product}} arrived, update the stock", + "instantiation_dict": { + "quantity": "5", + "product": "blue Cronus yoga pants with size 33" + }, + "intent": "5 blue Cronus yoga pants with size 33 arrived, update the stock", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/872/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "5" + } + }, + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/872/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value", + "required_contents": { + "exact_match": "1" + } + } + ] + }, + "intent_template_id": 241 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 769, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "We've received {{quantity}} {{product}}, please update the inventory.", + "instantiation_dict": { + "quantity": "378", + "product": "brown Aero daily fitness tee in every size" + }, + "intent": "We've received 378 brown Aero daily fitness tee in every size, please update the inventory.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/544/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "478" + } + }, + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/547/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "478" + } + }, + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/550/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "478" + } + }, + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/553/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "478" + } + }, + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/556/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "478" + } + } + ] + }, + "intent_template_id": 241 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 770, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "We've received {{quantity}}, update the inventory.", + "instantiation_dict": { + "quantity": "12 white Cora parachute pant of size 28 and 56 blue of size 29" + }, + "intent": "We've received 12 white Cora parachute pant of size 28 and 56 blue of size 29, update the inventory.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1836/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "112" + } + }, + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1838/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "156" + } + } + ] + }, + "intent_template_id": 241 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 771, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Approve the positive reviews to display in our store.", + "instantiation_dict": {}, + "intent": "Approve the positive reviews to display in our store.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/review/product/edit/id/352", + "locator": "document.querySelector('[name=\"status_id\"').value", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "http://localhost:28083/admin/review/product/edit/id/349", + "locator": "document.querySelector('[name=\"status_id\"').value", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "http://localhost:28083/admin/review/product/edit/id/347", + "locator": "document.querySelector('[name=\"status_id\"').value", + "required_contents": { + "exact_match": "1" + } + } + ] + }, + "intent_template_id": 243 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 772, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Delete all {{review_type}}", + "instantiation_dict": { + "review_type": "pending negative reviews for Circe fleece" + }, + "intent": "Delete all pending negative reviews for Circe fleece", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/review/product/edit/id/999", + "locator": "", + "required_contents": { + "must_include": [ + "Rating isn't Available" + ] + } + } + ] + }, + "intent_template_id": 246 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 773, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Delete all {{review_type}}", + "instantiation_dict": { + "review_type": "pending negative reviews" + }, + "intent": "Delete all pending negative reviews", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/review/product/edit/id/351", + "locator": "", + "required_contents": { + "must_include": [ + "Rating isn't Available" + ] + } + }, + { + "url": "http://localhost:28083/admin/review/product/edit/id/353", + "locator": "", + "required_contents": { + "must_include": [ + "Rating isn't Available" + ] + } + } + ] + }, + "intent_template_id": 246 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 774, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Delete all {{review_type}}", + "instantiation_dict": { + "review_type": "pending reviews with less than 4 stars" + }, + "intent": "Delete all pending reviews with less than 4 stars", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/review/product/edit/id/351", + "locator": "", + "required_contents": { + "must_include": [ + "Rating isn't Available" + ] + } + }, + { + "url": "http://localhost:28083/admin/review/product/edit/id/353", + "locator": "", + "required_contents": { + "must_include": [ + "Rating isn't Available" + ] + } + }, + { + "url": "http://localhost:28083/admin/review/product/edit/id/349", + "locator": "", + "required_contents": { + "must_include": [ + "Rating isn't Available" + ] + } + } + ] + }, + "intent_template_id": 246 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 775, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Delete all {{review_type}}", + "instantiation_dict": { + "review_type": "reviews from the scammer Arden" + }, + "intent": "Delete all reviews from the scammer Arden", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/review/product/edit/id/51", + "locator": "", + "required_contents": { + "must_include": [ + "Rating isn't Available" + ] + } + } + ] + }, + "intent_template_id": 246 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 776, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Delete all {{review_type}}", + "instantiation_dict": { + "review_type": "reviews from the scammer Carlo" + }, + "intent": "Delete all reviews from the scammer Carlo", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/review/product/edit/id/93", + "locator": "", + "required_contents": { + "must_include": [ + "Rating isn't Available" + ] + } + }, + { + "url": "http://localhost:28083/admin/review/product/edit/id/109", + "locator": "", + "required_contents": { + "must_include": [ + "Rating isn't Available" + ] + } + } + ] + }, + "intent_template_id": 246 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 777, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "{{action}} the price of {{config}} by {{amount}}", + "instantiation_dict": { + "amount": "$5", + "action": "Reduce", + "config": "green Hollister backyard sweater in all size" + }, + "intent": "Reduce the price of green Hollister backyard sweater in all size by $5", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/120/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "47.00" + } + }, + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/117/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "47.00" + } + }, + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/114/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "47.00" + } + }, + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/111/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "47.00" + } + }, + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/123/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "47.00" + } + } + ] + }, + "intent_template_id": 742 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 778, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "{{action}} the price of {{config}} by {{amount}}", + "instantiation_dict": { + "amount": "13.5%", + "action": "Reduce", + "config": "size 28 Sahara leggings" + }, + "intent": "Reduce the price of size 28 Sahara leggings by 13.5%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1841/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "64.88" + } + }, + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1842/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "64.88" + } + }, + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1843/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "64.88" + } + } + ] + }, + "intent_template_id": 742 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 779, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "{{action}} the price of {{config}} by {{amount}}", + "instantiation_dict": { + "amount": "15%", + "action": "Reduce", + "config": "yellow shirts from Gwyn Endurance in all size below L" + }, + "intent": "Reduce the price of yellow shirts from Gwyn Endurance in all size below L by 15%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1559/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "20.40" + } + }, + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1562/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "20.40" + } + }, + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1565/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "20.40" + } + } + ] + }, + "intent_template_id": 742 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 780, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin/catalog/product/edit/id/1481/", + "geolocation": null, + "intent_template": "{{action}} the price of {{config}} by {{amount}}", + "instantiation_dict": { + "amount": "$17", + "action": "Increase", + "config": "white Ingrid Running with size L and above" + }, + "intent": "Increase the price of white Ingrid Running with size L and above by $17", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1264/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "64.00" + } + }, + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1267/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "64.00" + } + } + ] + }, + "intent_template_id": 742 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 781, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "{{action}} the price of {{config}} by {{amount}}", + "instantiation_dict": { + "amount": "37%", + "action": "Increase", + "config": "black fitness tshirts from Desiree with size XS" + }, + "intent": "Increase the price of black fitness tshirts from Desiree with size XS by 37%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1573/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "32.88" + } + } + ] + }, + "intent_template_id": 742 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 782, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "{{action}} the price of {{config}} by {{amount}}", + "instantiation_dict": { + "amount": "23%", + "action": "Increase", + "config": "all blue running tshirts in extra small and small sizes" + }, + "intent": "Increase the price of all blue running tshirts in extra small and small sizes by 23%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/496/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "22.33" + } + }, + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/499/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "22.33" + } + }, + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/479/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "21.56" + } + }, + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/482/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "21.56" + } + } + ] + }, + "intent_template_id": 742 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 783, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Add the following users to repo {{repo}} as {{role}}: {{user_list}}", + "instantiation_dict": { + "repo": "kkroening/ffmpeg-python", + "role": "maintainer", + "user_list": [ + "yjlou", + "a11yproject" + ] + }, + "intent": "Add the following users to repo kkroening/ffmpeg-python as maintainer: ['yjlou', 'a11yproject']", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "not the owner of the repo so cannot add maintainer", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 351 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 784, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/CellularPrivacy/Android-IMSI-Catcher-Detector", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the contributor who has the most commits to branch {{branch_name}}", + "instantiation_dict": { + "branch_name": "main", + "attribute": "email address" + }, + "intent": "Tell me the email address of the contributor who has the most commits to branch main", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "secupwn@users.noreply.github.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "secupwn@users.noreply.github.com" + }, + "intent_template_id": 316 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 785, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/CellularPrivacy/Android-IMSI-Catcher-Detector", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the contributor who has the most commits to branch {{branch_name}}", + "instantiation_dict": { + "branch_name": "gh-page", + "attribute": "email address" + }, + "intent": "Tell me the email address of the contributor who has the most commits to branch gh-page", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "secupwn@users.noreply.github.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "secupwn@users.noreply.github.com" + }, + "intent_template_id": 316 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 786, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/vinta/awesome-python", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the contributor who has the most commits to branch {{branch_name}}", + "instantiation_dict": { + "branch_name": "main", + "attribute": "number of commits" + }, + "intent": "Tell me the number of commits of the contributor who has the most commits to branch main", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "412" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "412" + }, + "intent_template_id": 316 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 787, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/dehenne/awesome-visibility/-/graphs/master", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the contributor who has the most commits to branch {{branch_name}}", + "instantiation_dict": { + "branch_name": "main", + "attribute": "number of followers" + }, + "intent": "Tell me the number of followers of the contributor who has the most commits to branch main", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 316 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 788, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/amwhalen/archive-my-tweets/-/tree/github/fork/chtitux/addRssFeed", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the contributor who has the most commits to branch {{branch_name}}", + "instantiation_dict": { + "branch_name": "php52", + "attribute": "full name, gitlab account name, location and email address" + }, + "intent": "Tell me the full name, gitlab account name, location and email address of the contributor who has the most commits to branch php52", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Andrew M. Whalen", + "amwhalen", + "Massachusetts", + "github@amwhalen.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Andrew M. Whalen, amwhalen, Massachusetts, github@amwhalen.com" + }, + "intent_template_id": 316 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 789, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create an issue {{issue}} in {{repo}}.", + "instantiation_dict": { + "repo": "huggingface dataset", + "issue": "asking about do they have any plan on supporting Webagent in the next quater" + }, + "intent": "Create an issue asking about do they have any plan on supporting Webagent in the next quater in huggingface dataset.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "huggingface repo does not exist", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 328 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 790, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Delete all {{review_type}}", + "instantiation_dict": { + "review_type": "negative reviews for Sybil running short" + }, + "intent": "Delete all negative reviews for Sybil running short", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "No negative reviews for sybil running short", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 246 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 791, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Make a folder named {{directory}} on the {{gitlab_repo}} repo and include a file called urls.txt that consists of the links to the 5 most recent posts from {{subreddit}}.", + "instantiation_dict": { + "directory": "car", + "subreddit": "cars", + "gitlab_repo": "gimmiethat.space" + }, + "intent": "Make a folder named car on the gimmiethat.space repo and include a file called urls.txt that consists of the links to the 5 most recent posts from cars.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "the subreddit does not exist", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 84 + }, + { + "sites": [ + "shopping" + ], + "task_id": 792, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Buy the highest rated product from the {{product_category}} category within a budget {{dollar_value}}.", + "instantiation_dict": { + "product_category": "Beauty & Personal Care", + "dollar_value": "under 20" + }, + "intent": "Buy the highest rated product from the Beauty & Personal Care category within a budget under 20.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The website does not support sorting by rating and there are too many products in this category", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 172 + }, + { + "sites": [ + "shopping" + ], + "task_id": 793, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Buy the highest rated product from the {{product_category}} category within a budget {{dollar_value}}.", + "instantiation_dict": { + "product_category": "Men clothing", + "dollar_value": "above 50 but under 129.99" + }, + "intent": "Buy the highest rated product from the Men clothing category within a budget above 50 but under 129.99.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The website does not support sorting by rating and there are too many products in this category", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 172 + }, + { + "sites": [ + "shopping" + ], + "task_id": 794, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Change the delivery address for my most recent order to {{address}}.", + "instantiation_dict": { + "address": "4000 Forbes Ave, Pittsburgh, PA" + }, + "intent": "Change the delivery address for my most recent order to 4000 Forbes Ave, Pittsburgh, PA.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The shipping address cannot be changed after an order is placed in the website.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 191 + }, + { + "sites": [ + "shopping" + ], + "task_id": 795, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Change the delivery address for my most recent order to {{address}}.", + "instantiation_dict": { + "address": "6726 McPherson Blvd, Pittsburgh, PA" + }, + "intent": "Change the delivery address for my most recent order to 6726 McPherson Blvd, Pittsburgh, PA.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The shipping address cannot be changed after an order is placed in the website.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 191 + }, + { + "sites": [ + "shopping" + ], + "task_id": 796, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Change the delivery address for my most recent order to {{address}}.", + "instantiation_dict": { + "address": "155 5th Street, San Francisco, CA" + }, + "intent": "Change the delivery address for my most recent order to 155 5th Street, San Francisco, CA.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The shipping address cannot be changed after an order is placed in the website.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 191 + }, + { + "sites": [ + "shopping" + ], + "task_id": 797, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Change the delivery address for my most recent order to {{address}}.", + "instantiation_dict": { + "address": "3 Oxford St, Cambridge, MA" + }, + "intent": "Change the delivery address for my most recent order to 3 Oxford St, Cambridge, MA.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The shipping address cannot be changed after an order is placed in the website.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 191 + }, + { + "sites": [ + "shopping" + ], + "task_id": 798, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Change the delivery address for my most recent order to {{address}}.", + "instantiation_dict": { + "address": "77 Massachusetts Ave, Cambridge, MA" + }, + "intent": "Change the delivery address for my most recent order to 77 Massachusetts Ave, Cambridge, MA.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The shipping address cannot be changed after an order is placed in the website.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 191 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 799, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "create a new group \"{{name}}\" with members {{members}}", + "instantiation_dict": { + "name": "n-lab", + "members": "patou, egpast, westurner, jontutcher" + }, + "intent": "create a new group \"n-lab\" with members patou, egpast, westurner, jontutcher", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/groups/n-lab/-/group_members", + "locator": "", + "required_contents": { + "must_include": [ + "@patou", + "@egpast", + "@westurner", + "@jontutcher" + ] + } + } + ] + }, + "intent_template_id": 600 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 800, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "create a new group \"{{name}}\" with members {{members}}", + "instantiation_dict": { + "name": "x-lab", + "members": "JonasVautherin, dilipchandima, dawiss1337, bmyun, DCMJY" + }, + "intent": "create a new group \"x-lab\" with members JonasVautherin, dilipchandima, dawiss1337, bmyun, DCMJY", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/groups/x-lab/-/group_members", + "locator": "", + "required_contents": { + "must_include": [ + "@JonasVautherin", + "@dilipchandima", + "@dawiss1337", + "@bmyun", + "@DCMJY" + ] + } + } + ] + }, + "intent_template_id": 600 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 801, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "create a new group \"{{name}}\" with members {{members}}", + "instantiation_dict": { + "name": "crew", + "members": "ASWATFZLLC, patrickhlauke, westurner, linkmatrix" + }, + "intent": "create a new group \"crew\" with members ASWATFZLLC, patrickhlauke, westurner, linkmatrix", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/groups/crew/-/group_members", + "locator": "", + "required_contents": { + "must_include": [ + "@ASWATFZLLC", + "@patrickhlauke", + "@westurner", + "@linkmatrix" + ] + } + } + ] + }, + "intent_template_id": 600 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 802, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "create a new group \"{{name}}\" with members {{members}}", + "instantiation_dict": { + "name": "coding_friends", + "members": "qhduan, Agnes-U" + }, + "intent": "create a new group \"coding_friends\" with members qhduan, Agnes-U", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/groups/coding_friends/-/group_members", + "locator": "", + "required_contents": { + "must_include": [ + "@qhduan", + "@Agnes-U" + ] + } + } + ] + }, + "intent_template_id": 600 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 803, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "create a new group \"{{name}}\" with members {{members}}", + "instantiation_dict": { + "name": "webagent", + "members": "pandey2000, sayakpaul, sayakpaul" + }, + "intent": "create a new group \"webagent\" with members pandey2000, sayakpaul, sayakpaul", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/groups/webagent/-/group_members", + "locator": "", + "required_contents": { + "must_include": [ + "@pandey2000", + "@sayakpaul" + ] + } + } + ] + }, + "intent_template_id": 600 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 804, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Assign the issue regarding {{issue}} to {{account}}.", + "instantiation_dict": { + "issue": "flash alerts", + "account": "myself and primer" + }, + "intent": "Assign the issue regarding flash alerts to myself and primer.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/dashboard/issues?scope=all&state=opened&assignee_username=byteblaze", + "locator": "", + "required_contents": { + "must_include": [ + "Add documentation on using Flash alerts in dialog components", + "Clarify usage of flash alert" + ] + } + }, + { + "url": "http://localhost:28084/dashboard/issues?scope=all&state=opened&assignee_username=primer", + "locator": "", + "required_contents": { + "must_include": [ + "Add documentation on using Flash alerts in dialog components", + "Clarify usage of flash alert" + ] + } + } + ] + }, + "intent_template_id": 999 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 805, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "Submit a merge request for {{source_branch}} to be merged into {{target_branch}} branch, assign {{reviewer}} as the reviewer", + "instantiation_dict": { + "source_branch": "the branch that implements the support of template strings", + "target_branch": "master", + "reviewer": "myself and Roshan" + }, + "intent": "Submit a merge request for the branch that implements the support of template strings to be merged into master branch, assign myself and Roshan as the reviewer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/a11yproject/a11yproject.com/-/merge_requests", + "program_html": [ + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[0].outerText", + "required_contents": { + "exact_match": "feature/replace-gulp" + } + }, + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[1].outerText", + "required_contents": { + "exact_match": "main" + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.reviewer').outerText", + "required_contents": { + "must_include": [ + "Byte Blaze", + "Roshan Jossy" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 335 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 806, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Submit a merge request for {{source_branch}} branch to be merged into {{target_branch}} branch, assign {{reviewer}} as the reviewer", + "instantiation_dict": { + "source_branch": "a11yproject.com/redesign", + "target_branch": "markdown-figure-block", + "reviewer": "myself" + }, + "intent": "Submit a merge request for a11yproject.com/redesign branch to be merged into markdown-figure-block branch, assign myself as the reviewer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/a11yproject/a11yproject.com/-/merge_requests", + "program_html": [ + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[0].outerText", + "required_contents": { + "exact_match": "redesign" + } + }, + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[1].outerText", + "required_contents": { + "exact_match": "feature/markdown-figure-block" + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.reviewer').outerText", + "required_contents": { + "must_include": [ + "Byte Blaze" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 335 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 807, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/primer/design", + "geolocation": null, + "intent_template": "Submit a merge request for {{source_branch}} branch to be merged into {{target_branch}} branch, assign {{reviewer}} as the reviewer", + "instantiation_dict": { + "source_branch": "build time debug", + "target_branch": "main", + "reviewer": "myself" + }, + "intent": "Submit a merge request for build time debug branch to be merged into main branch, assign myself as the reviewer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/primer/design/-/merge_requests", + "program_html": [ + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[1].outerText", + "required_contents": { + "exact_match": "main" + } + }, + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[0].outerText", + "required_contents": { + "exact_match": "debug-build-time" + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.reviewer').outerText", + "required_contents": { + "must_include": [ + "Byte Blaze" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 335 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 808, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create an issue in {{repo}} repo with title \"{{issue}}\". Assign the issue to {{account}}. Set due date to be {{due}}", + "instantiation_dict": { + "repo": "cloud-to-butt", + "issue": "Let's keep the project alive", + "account": "myself", + "due": "the end of Q1 2033" + }, + "intent": "Create an issue in cloud-to-butt repo with title \"Let's keep the project alive\". Assign the issue to myself. Set due date to be the end of Q1 2033", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/byteblaze/cloud-to-butt/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[data-qa-selector=\"title_content\"]').outerText", + "required_contents": { + "exact_match": "Let's keep the project alive" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-testid=\"sidebar-due-date\"').outerText", + "required_contents": { + "must_include": [ + "Mar 31, 2033" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.assignee').outerText", + "required_contents": { + "must_include": [ + "Byte Blaze" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 327 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 809, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create an issue in {{repo}} repo with title \"{{issue}}\". Assign the issue to {{account}}. Set due date to be {{due}}", + "instantiation_dict": { + "repo": "a11yproject", + "issue": "404 for many URLs", + "account": "myself", + "due": "2030-1-3" + }, + "intent": "Create an issue in a11yproject repo with title \"404 for many URLs\". Assign the issue to myself. Set due date to be 2030-1-3", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/a11yproject/a11yproject.com/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[data-qa-selector=\"title_content\"]').outerText", + "required_contents": { + "exact_match": "404 for many URLs" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-testid=\"sidebar-due-date\"').outerText", + "required_contents": { + "must_include": [ + "Jan 3, 2030" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.assignee').outerText", + "required_contents": { + "must_include": [ + "Byte Blaze" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 327 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 810, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Assign the issue regarding {{issue}} in {{repo}} repo to {{account}}.", + "instantiation_dict": { + "repo": "primer design guide", + "issue": "flash alert", + "account": "myself" + }, + "intent": "Assign the issue regarding flash alert in primer design guide repo to myself.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/dashboard/issues?scope=all&state=opened&assignee_username=byteblaze", + "locator": "", + "required_contents": { + "must_include": [ + "Add documentation on using Flash alerts in dialog components", + "Clarify usage of flash alert" + ] + } + } + ] + }, + "intent_template_id": 999 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 811, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Assign the issue regarding {{issue}} in {{repo}} to {{account}}.", + "instantiation_dict": { + "repo": "a11yproject", + "issue": 404, + "account": "myself" + }, + "intent": "Assign the issue regarding 404 in a11yproject to myself.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/dashboard/issues?scope=all&state=opened&assignee_username=byteblaze", + "locator": "", + "required_contents": { + "must_include": [ + "404s, bad host, timeouts, bad urls for URLs linked from website" + ] + } + } + ] + }, + "intent_template_id": 999 + } +] diff --git a/VAB-WebArena-Lite/config_files/wa/test_webarena.raw.json b/VAB-WebArena-Lite/config_files/wa/test_webarena.raw.json new file mode 100644 index 0000000..3f9f440 --- /dev/null +++ b/VAB-WebArena-Lite/config_files/wa/test_webarena.raw.json @@ -0,0 +1,29604 @@ +[ + { + "sites": [ + "shopping_admin" + ], + "task_id": 0, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What is the top-{{n}} best-selling product in {{year}}", + "instantiation_dict": { + "n": 1, + "year": 2022 + }, + "intent": "What is the top-1 best-selling product in 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Quest Lumaflex\u2122 Band" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Quest Lumaflex\u2122 Band" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 1, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What is the top-{{n}} best-selling brand in {{period}}", + "instantiation_dict": { + "n": 1, + "period": "Quarter 1 2022" + }, + "intent": "What is the top-1 best-selling brand in Quarter 1 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Sprite" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Sprite" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 2, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What is the top-{{n}} best-selling product type in {{period}}", + "instantiation_dict": { + "n": 1, + "period": "Quarter 1 2022" + }, + "intent": "What is the top-1 best-selling product type in Quarter 1 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Yoga ball" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Yoga ball" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 3, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What are the top-{{n}} best-selling product in {{year}}", + "instantiation_dict": { + "n": 2, + "year": 2022 + }, + "intent": "What are the top-2 best-selling product in 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Quest Lumaflex\u2122 Band", + "Sprite Stasis Ball 65 cm" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Quest Lumaflex\u2122 Band, Sprite Stasis Ball 65 cm" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 4, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What are the top-{{n}} best-selling product in {{period}}", + "instantiation_dict": { + "n": 3, + "period": "Jan 2023" + }, + "intent": "What are the top-3 best-selling product in Jan 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Impulse Duffle", + "Overnight Duffle", + "Hawkeye Yoga Short-32-Blue" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Impulse Duffle, Overnight Duffle, Hawkeye Yoga Short-32-Blue" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 5, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What is the top-{{n}} best-selling product type in {{period}}", + "instantiation_dict": { + "n": 1, + "period": "Jan 2023" + }, + "intent": "What is the top-1 best-selling product type in Jan 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Duffle" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Duffle" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 6, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What are the top-{{n}} best-selling product in {{year}}", + "instantiation_dict": { + "n": 5, + "year": 2023 + }, + "intent": "What are the top-5 best-selling product in 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Sprite Yoga Strap 6 foot", + "Overnight Duffle", + "Ida Workout Parachute Pant-29-Purple", + "Hawkeye Yoga Short-32-Blue", + "Sprite Stasis Ball 65 cm" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Sprite Yoga Strap 6 foot, Overnight Duffle, Ida Workout Parachute Pant-29-Purple, Hawkeye Yoga Short-32-Blue, Sprite Stasis Ball 65 cm" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "map" + ], + "task_id": 7, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the full address of all {{airport_type}} that are within a driving distance of {{radius}} to {{start}}", + "instantiation_dict": { + "airport_type": "international airports", + "start": "Carnegie Mellon University", + "radius": "50 km" + }, + "intent": "Tell me the full address of all international airports that are within a driving distance of 50 km to Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Pittsburgh International Airport, Southern Beltway, Findlay Township, Allegheny County, 15231, United States" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Pittsburgh International Airport People Movers, Airport Boulevard, Findlay Township, Allegheny County, Pennsylvania, 15231, United States" + }, + "intent_template_id": 79 + }, + { + "sites": [ + "map" + ], + "task_id": 8, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the full address of all {{airport_type}} that are within a driving distance of {{radius}} to {{start}}", + "instantiation_dict": { + "airport_type": "international airports", + "start": "Carnegie Mellon University", + "radius": "5 km" + }, + "intent": "Tell me the full address of all international airports that are within a driving distance of 5 km to Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "There is no airport within 5 km of Carnegie Mellon University" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "There is no airport within 5 km of Carnegie Mellon University" + }, + "intent_template_id": 79 + }, + { + "sites": [ + "map" + ], + "task_id": 9, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the full address of all {{airport_type}} that are within a driving distance of {{radius}} to {{start}}", + "instantiation_dict": { + "airport_type": "international airports", + "start": "Carnegie Art Museum", + "radius": "30 km" + }, + "intent": "Tell me the full address of all international airports that are within a driving distance of 30 km to Carnegie Art Museum", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Pittsburgh International Airport, Southern Beltway, Findlay Township, Allegheny County, 15231, United States" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Pittsburgh International Airport People Movers, Airport Boulevard, Findlay Township, Allegheny County, Pennsylvania, 15231, United States" + }, + "intent_template_id": 79 + }, + { + "sites": [ + "map" + ], + "task_id": 10, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the full address of all {{airport_type}} that are within a driving distance of {{radius}} to {{start}}", + "instantiation_dict": { + "airport_type": "US international airports", + "start": "Niagara Falls", + "radius": "60 km" + }, + "intent": "Tell me the full address of all US international airports that are within a driving distance of 60 km to Niagara Falls", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Niagara Falls International Airport, 2035, Niagara Falls Boulevard, City of Niagara Falls, Town of Wheatfield, Niagara County, New York, 14304, United States", + "Buffalo-Niagara International Airport, Holtz Drive, Town of Cheektowaga, Erie County, New York, 14225, United States" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Niagara Falls International Airport, 2035, Niagara Falls Boulevard, City of Niagara Falls, Town of Wheatfield, Niagara County, New York, 14304, United States Buffalo-Niagara International Airport, South Youngs Road, Town of Cheektowaga, Erie County, New York, 14221, United States" + }, + "intent_template_id": 79 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 11, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the the number of reviews that our store received by far that mention term \"{{term}}\"", + "instantiation_dict": { + "term": "disappointed" + }, + "intent": "Tell me the the number of reviews that our store received by far that mention term \"disappointed\"", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "6" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "6" + }, + "intent_template_id": 288 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 12, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the the number of reviews that our store received by far that mention term \"{{term}}\"", + "instantiation_dict": { + "term": "satisfied" + }, + "intent": "Tell me the the number of reviews that our store received by far that mention term \"satisfied\"", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "2" + }, + "intent_template_id": 288 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 13, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the the number of reviews that our store received by far that mention term \"{{term}}\"", + "instantiation_dict": { + "term": "decent" + }, + "intent": "Tell me the the number of reviews that our store received by far that mention term \"decent\"", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "2" + }, + "intent_template_id": 288 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 14, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the the number of reviews that our store received by far that mention term \"{{term}}\"", + "instantiation_dict": { + "term": "not useful" + }, + "intent": "Tell me the the number of reviews that our store received by far that mention term \"not useful\"", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 288 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 15, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the the number of reviews that our store received by far that mention term \"{{term}}\"", + "instantiation_dict": { + "term": "best" + }, + "intent": "Tell me the the number of reviews that our store received by far that mention term \"best\"", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "2" + }, + "intent_template_id": 288 + }, + { + "sites": [ + "map" + ], + "task_id": 16, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Compare the time for walking and driving route from {{start}} to {{end}}", + "instantiation_dict": { + "start": "5000 Fifth Avenue, Pittsburgh", + "end": "UPMC family health center" + }, + "intent": "Compare the time for walking and driving route from 5000 Fifth Avenue, Pittsburgh to UPMC family health center", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "driving: 2min", + "walking: 16min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Driving: 2min. Walking: 16min." + }, + "intent_template_id": 73 + }, + { + "sites": [ + "map" + ], + "task_id": 17, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Compare the time for walking and driving route from {{start}} to {{end}}", + "instantiation_dict": { + "start": "AMC Waterfront", + "end": "Carnegie Mellon University" + }, + "intent": "Compare the time for walking and driving route from AMC Waterfront to Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "driving: 13min", + "walking: 1h 35min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "driving: 13min, walking: 1h 35min." + }, + "intent_template_id": 73 + }, + { + "sites": [ + "map" + ], + "task_id": 18, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Compare the time for walking and driving route from {{start}} to {{end}}", + "instantiation_dict": { + "start": "AMC Waterfront", + "end": "Univ of Pittsburgh" + }, + "intent": "Compare the time for walking and driving route from AMC Waterfront to Univ of Pittsburgh", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "driving: 15min", + "walking: 1h 47min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "driving: 15min, walking: 1h 47min." + }, + "intent_template_id": 73 + }, + { + "sites": [ + "map" + ], + "task_id": 19, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Compare the time for walking and driving route from {{start}} to {{end}}", + "instantiation_dict": { + "start": "Carnegie Science Center", + "end": "Carnegie Mellon University" + }, + "intent": "Compare the time for walking and driving route from Carnegie Science Center to Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "driving: 12min", + "walking: 1h 44min." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "driving: 12min, walking: 1h 44min." + }, + "intent_template_id": 73 + }, + { + "sites": [ + "map" + ], + "task_id": 20, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Compare the difference in time for walking and driving route from {{start}} to {{end}}", + "instantiation_dict": { + "start": "Randyland", + "end": "Carnegie Mellon University" + }, + "intent": "Compare the difference in time for walking and driving route from Randyland to Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "driving: 13min", + "walking: 1h 45min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "driving: 13min, walking: 1h 45min." + }, + "intent_template_id": 73 + }, + { + "sites": [ + "shopping" + ], + "task_id": 21, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/6s-wireless-headphones-over-ear-noise-canceling-hi-fi-bass-foldable-stereo-wireless-kid-headsets-earbuds-with-built-in-mic-micro-sd-tf-fm-for-iphone-samsung-ipad-pc-black-gold.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention about {{description}}", + "instantiation_dict": { + "description": "ear cups being small" + }, + "intent": "List out reviewers, if exist, who mention about ear cups being small", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Joseph Brzezinski", + "Catso", + "Dibbins", + "Anglebert Dinkherhump", + "Michelle Davis" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Joseph Brzezinski, Catso, Dibbins, Anglebert Dinkherhump, Michelle Davis" + }, + "intent_template_id": 222 + }, + { + "sites": [ + "shopping" + ], + "task_id": 22, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/fujifilm-finepix-z200fd-10mp-digital-camera-with-5x-optical-dual-image-stabilized-zoom-black.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention about {{description}}", + "instantiation_dict": { + "description": "under water photo" + }, + "intent": "List out reviewers, if exist, who mention about under water photo", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no review about under water photo", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 222 + }, + { + "sites": [ + "shopping" + ], + "task_id": 23, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/3-pack-samsung-galaxy-s6-screen-protector-nearpow-tempered-glass-screen-protector-with-9h-hardness-crystal-clear-easy-bubble-free-installation-scratch-resist.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention about {{description}}", + "instantiation_dict": { + "description": "good fingerprint resistant" + }, + "intent": "List out reviewers, if exist, who mention about good fingerprint resistant", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Rachel", + "T. Gannon" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Rachel, T. Gannon, " + }, + "intent_template_id": 222 + }, + { + "sites": [ + "shopping" + ], + "task_id": 24, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/haflinger-men-s-wool-felt-open-back-slippers-beige-550-peat-us-7.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention about {{description}}", + "instantiation_dict": { + "description": "price being unfair" + }, + "intent": "List out reviewers, if exist, who mention about price being unfair", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no reivew about price being unfair", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 222 + }, + { + "sites": [ + "shopping" + ], + "task_id": 25, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/epson-workforce-wf-3620-wifi-direct-all-in-one-color-inkjet-printer-copier-scanner-amazon-dash-replenishment-ready.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention about {{description}}", + "instantiation_dict": { + "description": "average print quality" + }, + "intent": "List out reviewers, if exist, who mention about average print quality", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Goldfish", + "Roxanne Brandon Coffey" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "GoldfishGoldfish, Roxanne Brandon Coffey" + }, + "intent_template_id": 222 + }, + { + "sites": [ + "shopping" + ], + "task_id": 26, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/epson-workforce-wf-3620-wifi-direct-all-in-one-color-inkjet-printer-copier-scanner-amazon-dash-replenishment-ready.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention about {{description}}", + "instantiation_dict": { + "description": "complain of the customer service" + }, + "intent": "List out reviewers, if exist, who mention about complain of the customer service", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Bob in Vegas", + "RemyR" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Bob in Vegas, RemyRRemyR" + }, + "intent_template_id": 222 + }, + { + "sites": [ + "reddit" + ], + "task_id": 27, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the {{forum}} forum.", + "instantiation_dict": { + "forum": "Showerthoughts" + }, + "intent": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the Showerthoughts forum.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 33 + }, + { + "sites": [ + "reddit" + ], + "task_id": 28, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the {{forum}} forum.", + "instantiation_dict": { + "forum": "Worcester" + }, + "intent": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the Worcester forum.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 33 + }, + { + "sites": [ + "reddit" + ], + "task_id": 29, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the {{forum}} forum.", + "instantiation_dict": { + "forum": "DIY" + }, + "intent": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the DIY forum.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1" + }, + "intent_template_id": 33 + }, + { + "sites": [ + "reddit" + ], + "task_id": 30, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the {{forum}} forum.", + "instantiation_dict": { + "forum": "space" + }, + "intent": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the space forum.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 33 + }, + { + "sites": [ + "reddit" + ], + "task_id": 31, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the {{forum}} forum.", + "instantiation_dict": { + "forum": "photoshopbattles" + }, + "intent": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the photoshopbattles forum.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 33 + }, + { + "sites": [ + "map" + ], + "task_id": 32, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "I will arrive {{place}} soon. Provide the name of a {{target1}} in the vicinity, if available. Then, tell me the {{information}} to {{target2}} from the hotel.", + "instantiation_dict": { + "place": "Pittsburgh Airport", + "information": "the walking distance", + "target1": "Hilton hotel", + "target2": "the nearest supermarket own by a local company" + }, + "intent": "I will arrive Pittsburgh Airport soon. Provide the name of a Hilton hotel in the vicinity, if available. Then, tell me the the walking distance to the nearest supermarket own by a local company from the hotel.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "DoubleTree by Hilton Hotel Pittsburgh Airport", + "2.0km" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "DoubleTree by Hilton Hotel Pittsburgh Airport Distance: 2.0km" + }, + "intent_template_id": 78 + }, + { + "sites": [ + "map" + ], + "task_id": 33, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "I will arrive {{place}} soon. Provide the name of a {{target1}} in the vicinity, if available. Then, tell me the {{information}} to {{target2}} from the hotel.", + "instantiation_dict": { + "place": "Pittsburgh Airport", + "target1": "Hilton hotel", + "information": "the shortest walking distance", + "target2": "a supermarket" + }, + "intent": "I will arrive Pittsburgh Airport soon. Provide the name of a Hilton hotel in the vicinity, if available. Then, tell me the the shortest walking distance to a supermarket from the hotel.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "DoubleTree by Hilton Hotel Pittsburgh Airport", + "1.4km" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "DoubleTree by Hilton Hotel Pittsburgh Airport Distance: 1.4km" + }, + "intent_template_id": 78 + }, + { + "sites": [ + "map" + ], + "task_id": 34, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "I will arrive {{place}} soon. Provide the name of a {{target1}} in the vicinity, if available. Then, tell me the {{information}} to {{target2}} from the hotel.", + "instantiation_dict": { + "place": "Pittsburgh Airport", + "target1": "Hyatt hotel", + "information": "the shortest walking time", + "target2": "a supermarket" + }, + "intent": "I will arrive Pittsburgh Airport soon. Provide the name of a Hyatt hotel in the vicinity, if available. Then, tell me the the shortest walking time to a supermarket from the hotel.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Hyatt Regency Pittsburgh International Airport" + ], + "fuzzy_match": [ + "Time: 3h 30min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Hyatt Regency Pittsburgh International Airport\n3:30" + }, + "intent_template_id": 78 + }, + { + "sites": [ + "map" + ], + "task_id": 35, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "I will arrive {{place}} soon. Provide the name of a {{target1}} in the vicinity, if available. Then, tell me the {{information}} to {{target2}} from the hotel.", + "instantiation_dict": { + "place": "Pittsburgh Airport", + "target1": "Hyatt hotel", + "information": "the minimal driving time", + "target2": "a supermarket" + }, + "intent": "I will arrive Pittsburgh Airport soon. Provide the name of a Hyatt hotel in the vicinity, if available. Then, tell me the the minimal driving time to a supermarket from the hotel.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Hyatt Regency Pittsburgh International Airport" + ], + "fuzzy_match": [ + "Time: 15min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Hyatt Regency Pittsburgh International Airport Time: 15min" + }, + "intent_template_id": 78 + }, + { + "sites": [ + "map" + ], + "task_id": 36, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Check if the {{place}} in pittsburgh can be reached in one hour by car from {{location}}", + "instantiation_dict": { + "place": "social security administration", + "location": "Carnegie Mellon University" + }, + "intent": "Check if the social security administration in pittsburgh can be reached in one hour by car from Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": ["Yes"] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Yes" + }, + "intent_template_id": 77 + }, + { + "sites": [ + "map" + ], + "task_id": 37, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Check if the {{place}} in pittsburgh can be reached in one hour by car from {{location}}", + "instantiation_dict": { + "place": "police station", + "location": "gates building at CMU" + }, + "intent": "Check if the police station in pittsburgh can be reached in one hour by car from gates building at CMU", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": ["Yes"] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Yes" + }, + "intent_template_id": 77 + }, + { + "sites": [ + "map" + ], + "task_id": 38, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Check if the {{place}} in pittsburgh can be reached in one hour by car from {{location}}", + "instantiation_dict": { + "place": "duquesne university", + "location": "pittsburgh airport" + }, + "intent": "Check if the duquesne university in pittsburgh can be reached in one hour by car from pittsburgh airport", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": ["Yes"] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Yes" + }, + "intent_template_id": 77 + }, + { + "sites": [ + "map" + ], + "task_id": 39, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Check if the {{place}} in pittsburgh can be reached in one hour by car from {{location}}", + "instantiation_dict": { + "place": "walmart", + "location": "5600 fifth avenue" + }, + "intent": "Check if the walmart in pittsburgh can be reached in one hour by car from 5600 fifth avenue", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": ["Yes"] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Yes" + }, + "intent_template_id": 77 + }, + { + "sites": [ + "map" + ], + "task_id": 40, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Check if the {{place}} in pittsburgh can be reached in one hour by car from {{location}}", + "instantiation_dict": { + "place": "amc theatre", + "location": "hobart street" + }, + "intent": "Check if the amc theatre in pittsburgh can be reached in one hour by car from hobart street", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": ["Yes"] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Yes" + }, + "intent_template_id": 77 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 41, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "List the top {{n}} search terms in my store", + "instantiation_dict": { + "n": "1" + }, + "intent": "List the top 1 search terms in my store", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "hollister" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "hollister" + }, + "intent_template_id": 285 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 42, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "List the top {{n}} search terms in my store", + "instantiation_dict": { + "n": "2" + }, + "intent": "List the top 2 search terms in my store", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "hollister", + "Joust Bag" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "hollister, Joust Bag" + }, + "intent_template_id": 285 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 43, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "List the top {{n}} search terms in my store", + "instantiation_dict": { + "n": "3" + }, + "intent": "List the top 3 search terms in my store", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "hollister", + "Joust Bag", + "Antonia Racer Tank" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "hollister, Joust Bag, Antonia Race Tank" + }, + "intent_template_id": 285 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 44, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Check out my todos", + "instantiation_dict": {}, + "intent": "Check out my todos", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/dashboard/todos", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 303 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 45, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "Check out the most recent open issues", + "instantiation_dict": {}, + "intent": "Check out the most recent open issues", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/issues/?sort=created_asc&state=opened", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 300 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 46, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/primer/design", + "geolocation": null, + "intent_template": "Check out the most recent open issues", + "instantiation_dict": {}, + "intent": "Check out the most recent open issues", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/primer/design/-/issues/?sort=created_date&state=opened", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 300 + }, + { + "sites": [ + "shopping" + ], + "task_id": 47, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Today is 6/12/2023. Tell me how many fulfilled orders I have {{period}}, and the total amount of money I spent.", + "instantiation_dict": { + "period": "over the past month" + }, + "intent": "Today is 6/12/2023. Tell me how many fulfilled orders I have over the past month, and the total amount of money I spent.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "0 order", + "$0 total spend" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0 order, $0 total spend" + }, + "intent_template_id": 197 + }, + { + "sites": [ + "shopping" + ], + "task_id": 48, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Today is 6/12/2023. Tell me how many fulfilled orders I have {{period}}, and the total amount of money I spent.", + "instantiation_dict": { + "period": "over the past three days" + }, + "intent": "Today is 6/12/2023. Tell me how many fulfilled orders I have over the past three days, and the total amount of money I spent.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "0 order", + "$0 total spend" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0 order, $0 total spend" + }, + "intent_template_id": 197 + }, + { + "sites": [ + "shopping" + ], + "task_id": 49, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Today is 6/12/2023. Tell me how many fulfilled orders I have {{period}}, and the total amount of money I spent.", + "instantiation_dict": { + "period": "over the past four month" + }, + "intent": "Today is 6/12/2023. Tell me how many fulfilled orders I have over the past four month, and the total amount of money I spent.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "3 orders", + "$845.49 total spend" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3 orders, $845.49 total spend" + }, + "intent_template_id": 197 + }, + { + "sites": [ + "shopping" + ], + "task_id": 50, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Today is 6/12/2023. Tell me how many fulfilled orders I have {{period}}, and the total amount of money I spent.", + "instantiation_dict": { + "period": "over the past year" + }, + "intent": "Today is 6/12/2023. Tell me how many fulfilled orders I have over the past year, and the total amount of money I spent.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "24 orders", + "$6560.69 total spend" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "24 orders, $6560.69 total spend" + }, + "intent_template_id": 197 + }, + { + "sites": [ + "shopping" + ], + "task_id": 51, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Today is 6/12/2023. Tell me how many fulfilled orders I have {{period}}, and the total amount of money I spent.", + "instantiation_dict": { + "period": "over the past six month" + }, + "intent": "Today is 6/12/2023. Tell me how many fulfilled orders I have over the past six month, and the total amount of money I spent.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "12 orders", + "$1603.69 total spend" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "12 orders, $1603.69 total spend" + }, + "intent_template_id": 197 + }, + { + "sites": [ + "map" + ], + "task_id": 52, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "How long does it take to walk from {{start}} to {{end}}?", + "instantiation_dict": { + "start": "Carnegie Mellon University", + "end": "starbucks on Craig Street" + }, + "intent": "How long does it take to walk from Carnegie Mellon University to starbucks on Craig Street?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "7 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "7 min" + }, + "intent_template_id": 68 + }, + { + "sites": [ + "map" + ], + "task_id": 53, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "How long does it take to walk from {{start}} to {{end}}?", + "instantiation_dict": { + "start": "Univ of Pittsburgh", + "end": "starbucks on Craig Street" + }, + "intent": "How long does it take to walk from Univ of Pittsburgh to starbucks on Craig Street?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "18 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "18 min" + }, + "intent_template_id": 68 + }, + { + "sites": [ + "map" + ], + "task_id": 54, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "How long does it take to walk from {{start}} to {{end}}?", + "instantiation_dict": { + "start": "Carnegie Mellon University", + "end": "Univ of Pittsburgh" + }, + "intent": "How long does it take to walk from Carnegie Mellon University to Univ of Pittsburgh?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "25 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "25 min" + }, + "intent_template_id": 68 + }, + { + "sites": [ + "map" + ], + "task_id": 55, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "How long does it take to walk from {{start}} to {{end}}?", + "instantiation_dict": { + "start": "the starbuck near CMU", + "end": "Chatham university" + }, + "intent": "How long does it take to walk from the starbuck near CMU to Chatham university?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "30 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "30 min" + }, + "intent_template_id": 68 + }, + { + "sites": [ + "map" + ], + "task_id": 56, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "How long does it take to walk from {{start}} to {{end}}?", + "instantiation_dict": { + "start": "Carnegie Museum of Art", + "end": "a library at CMU" + }, + "intent": "How long does it take to walk from Carnegie Museum of Art to a library at CMU?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "11 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "11 min" + }, + "intent_template_id": 68 + }, + { + "sites": [ + "map" + ], + "task_id": 57, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the closest {{place1}}(s) to {{place2}}", + "instantiation_dict": { + "place1": "restaurant", + "place2": "university center at Carnegie Mellon University" + }, + "intent": "Tell me the closest restaurant(s) to university center at Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "El Gallo de Oro", + "Back Bar Grill", + "Grano", + "Beefsteak", + "Nourish", + "Schatz Dining Room", + "Au Bon Pain" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "El Gallo de Oro, Back Bar Grill, Grano, Beefsteak, Nourish, Schatz Dining Room, Au Bon Pain" + }, + "intent_template_id": 69 + }, + { + "sites": [ + "map" + ], + "task_id": 58, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the closest {{place1}}(s) to {{place2}}", + "instantiation_dict": { + "place1": "cafe", + "place2": "CMU Hunt library" + }, + "intent": "Tell me the closest cafe(s) to CMU Hunt library", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "De Fer Coffee & Tea" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "De Fer Coffee & Tea" + }, + "intent_template_id": 69 + }, + { + "sites": [ + "map" + ], + "task_id": 59, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the closest {{place1}}(s) to {{place2}}", + "instantiation_dict": { + "place1": "restaurant", + "place2": "CMU Hunt library" + }, + "intent": "Tell me the closest restaurant(s) to CMU Hunt library", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "The exchange" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "The exchange" + }, + "intent_template_id": 69 + }, + { + "sites": [ + "map" + ], + "task_id": 60, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the closest {{place1}}(s) to {{place2}}", + "instantiation_dict": { + "place1": "restaurant", + "place2": "CMU Posner Hall" + }, + "intent": "Tell me the closest restaurant(s) to CMU Posner Hall", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "The exchange" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "The exchange" + }, + "intent_template_id": 69 + }, + { + "sites": [ + "map" + ], + "task_id": 61, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the closest {{place1}}(s) to {{place2}}", + "instantiation_dict": { + "place1": "restaurant", + "place2": "CMU Sorrells Library" + }, + "intent": "Tell me the closest restaurant(s) to CMU Sorrells Library", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "La Prima Espresso" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "La Prima Espresso" + }, + "intent_template_id": 69 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 62, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Which customer has completed the {{quantifier}} number of orders in the entire history?", + "instantiation_dict": { + "quantifier": "most" + }, + "intent": "Which customer has completed the most number of orders in the entire history?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Jane Smith" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Jane Smith" + }, + "intent_template_id": 276 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 63, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Which customer(s) has completed the {{quantifier}} number of orders in the entire history?", + "instantiation_dict": { + "quantifier": "second most" + }, + "intent": "Which customer(s) has completed the second most number of orders in the entire history?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Adam Garcia", + "Michael Nguyen", + "Sarah Miller" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Adam Garcia, Michael Nguyen, Sarah Miller" + }, + "intent_template_id": 276 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 64, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Which customer has placed {{number}} orders in the entire history?", + "instantiation_dict": { + "number": "2" + }, + "intent": "Which customer has placed 2 orders in the entire history?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Lisa Kim", + "Lisa Green", + "Julia Williams", + "Brian Smith", + "Alexander Thomas" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lisa Kim, Lisa Green, Julia Williams, Brian Smith, Alexander Thomas" + }, + "intent_template_id": 276 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 65, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Which customer has completed the {{quantifier}} number of orders in the entire history?", + "instantiation_dict": { + "quantifier": "fifth most" + }, + "intent": "Which customer has completed the fifth most number of orders in the entire history?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Jane Doe" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Jane Doe" + }, + "intent_template_id": 276 + }, + { + "sites": [ + "reddit" + ], + "task_id": 66, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Among the top {{number}} post in \"{{subreddit}}\" forum, {{description}}", + "instantiation_dict": { + "number": 10, + "subreddit": "books", + "description": "show me the post URLs that recommand a single book" + }, + "intent": "Among the top 10 post in \"books\" forum, show me the post URLs that recommand a single book", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "http://www.reddit.com/f/books/59396/apple-books-has-a-free-audiobook-of-a-christmas-carol", + "http://www.reddit.com/f/books/17445/i-just-finished-reading-the-hobbit-to-my-6-year-old-daughter" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__REDDIT__/f/books/59396/apple-books-has-a-free-audiobook-of-a-christmas-carol, __REDDIT__/f/books/17445/i-just-finished-reading-the-hobbit-to-my-6-year-old-daughter" + }, + "intent_template_id": 17 + }, + { + "sites": [ + "reddit" + ], + "task_id": 67, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Among the top {{number}} post in \"{{subreddit}}\" forum, {{description}}", + "instantiation_dict": { + "number": 10, + "subreddit": "books", + "description": "show me the book names from posts that recommand a single book" + }, + "intent": "Among the top 10 post in \"books\" forum, show me the book names from posts that recommand a single book", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "A Christmas Carol", + "The Hobbit" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "A Christmas Carol, The Hobbit" + }, + "intent_template_id": 17 + }, + { + "sites": [ + "reddit" + ], + "task_id": 68, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Among the top {{number}} post in \"{{subreddit}}\" forum, {{description}}", + "instantiation_dict": { + "number": 10, + "subreddit": "books", + "description": "show me the author name and the book name from posts that recommand a single book" + }, + "intent": "Among the top 10 post in \"books\" forum, show me the author name and the book name from posts that recommand a single book", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "A Christmas Carol", + "Levar Burton", + "The Hobbit", + "J. R. R. Tolkien" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "A Christmas Carol by Levar Burton: , The Hobbit by J. R. R. Tolkien" + }, + "intent_template_id": 17 + }, + { + "sites": [ + "reddit" + ], + "task_id": 69, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Among the top {{number}} post in \"{{subreddit}}\" forum, {{description}}", + "instantiation_dict": { + "number": 10, + "subreddit": "books", + "description": "is there any post talks about supporting local book stores? If so, tell me the organizations involved" + }, + "intent": "Among the top 10 post in \"books\" forum, is there any post talks about supporting local book stores? If so, tell me the organizations involved", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "bookshop.org" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "bookshop.org" + }, + "intent_template_id": 17 + }, + { + "sites": [ + "map" + ], + "task_id": 70, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the zip code of {{place}}?", + "instantiation_dict": { + "place": "Carnegie Mellon University" + }, + "intent": "What is the zip code of Carnegie Mellon University?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "15213" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "15213" + }, + "intent_template_id": 70 + }, + { + "sites": [ + "map" + ], + "task_id": 71, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the zip code of {{place}}?", + "instantiation_dict": { + "place": "Chatham University" + }, + "intent": "What is the zip code of Chatham University?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "15232" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "15232" + }, + "intent_template_id": 70 + }, + { + "sites": [ + "map" + ], + "task_id": 72, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the zip code of {{place}}?", + "instantiation_dict": { + "place": "Yale University" + }, + "intent": "What is the zip code of Yale University?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "06516" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "06516" + }, + "intent_template_id": 70 + }, + { + "sites": [ + "map" + ], + "task_id": 73, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the zip code of {{place}}?", + "instantiation_dict": { + "place": "Columbia University" + }, + "intent": "What is the zip code of Columbia University?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "10027" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "10027" + }, + "intent_template_id": 70 + }, + { + "sites": [ + "map" + ], + "task_id": 74, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Given the following locations, {{place_list}}, what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.", + "instantiation_dict": { + "place_list": [ + "Carnegie Mellon University", + "apple store shadyside", + "starbucks on craig street" + ] + }, + "intent": "Given the following locations, ['Carnegie Mellon University', 'apple store shadyside', 'starbucks on craig street'], what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "The order is Carnegie Mellon University, starbucks on forbes ave, apple store shadyside" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Carnegie Mellon University, starbucks on forbes ave, apple store shadyside" + }, + "intent_template_id": 65 + }, + { + "sites": [ + "map" + ], + "task_id": 75, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Given the following locations, {{place_list}}, what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.", + "instantiation_dict": { + "place_list": [ + "Massachusetts Institute of Technology", + "Harvard University", + "Boston Logan International Airport" + ] + }, + "intent": "Given the following locations, ['Massachusetts Institute of Technology', 'Harvard University', 'Boston Logan International Airport'], what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "The order is Massachusetts Institute of Technology, Harvard University, Boston Logan International Airport" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Massachusetts Institute of Technology, Harvard University, Boston Logan International Airport" + }, + "intent_template_id": 65 + }, + { + "sites": [ + "map" + ], + "task_id": 76, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Given the following locations, {{place_list}}, what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.", + "instantiation_dict": { + "place_list": [ + "Princeton University", + "Yale University", + "Harvard University" + ] + }, + "intent": "Given the following locations, ['Princeton University', 'Yale University', 'Harvard University'], what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "The order is Princeton University, Yale University, Harvard University" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Princeton University, Yale University, Harvard University" + }, + "intent_template_id": 65 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 77, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What is the total count of {{status}} reviews amongst all the reviews?", + "instantiation_dict": { + "status": "Pending" + }, + "intent": "What is the total count of Pending reviews amongst all the reviews?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "5" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "5" + }, + "intent_template_id": 277 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 78, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What is the total count of {{status}} reviews amongst all the reviews?", + "instantiation_dict": { + "status": "Approved" + }, + "intent": "What is the total count of Approved reviews amongst all the reviews?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "346" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "346" + }, + "intent_template_id": 277 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 79, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What is the total count of {{status}} reviews amongst all the reviews?", + "instantiation_dict": { + "status": "Not Approved" + }, + "intent": "What is the total count of Not Approved reviews amongst all the reviews?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 277 + }, + { + "sites": [ + "map" + ], + "task_id": 80, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the duration required to first walk from {{place_A}} to {{place_B}}, and then drive to {{place_C}}?", + "instantiation_dict": { + "place_A": "Carnegie Mellon University", + "place_B": "Starbucks on Craig Street", + "place_C": "Pittsburgh International Airport" + }, + "intent": "What is the duration required to first walk from Carnegie Mellon University to Starbucks on Craig Street, and then drive to Pittsburgh International Airport?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "38 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "38 min" + }, + "intent_template_id": 72 + }, + { + "sites": [ + "map" + ], + "task_id": 81, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the duration required to first walk from {{place_A}} to {{place_B}}, and then drive to {{place_C}}?", + "instantiation_dict": { + "place_A": "Univ of Pittsburgh", + "place_B": "starbucks on Craig Street", + "place_C": "Pittsburgh International Airport" + }, + "intent": "What is the duration required to first walk from Univ of Pittsburgh to starbucks on Craig Street, and then drive to Pittsburgh International Airport?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "49 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "49 min" + }, + "intent_template_id": 72 + }, + { + "sites": [ + "map" + ], + "task_id": 82, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the duration required to first walk from {{place_A}} to {{place_B}}, and then drive to {{place_C}}?", + "instantiation_dict": { + "place_A": "Massachusetts Institute of Technology", + "place_B": "Harvard University", + "place_C": "Boston Logan International Airport" + }, + "intent": "What is the duration required to first walk from Massachusetts Institute of Technology to Harvard University, and then drive to Boston Logan International Airport?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "63 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "63 min" + }, + "intent_template_id": 72 + }, + { + "sites": [ + "map" + ], + "task_id": 83, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the duration required to first walk from {{place_A}} to {{place_B}}, and then drive to {{place_C}}?", + "instantiation_dict": { + "place_A": "Carnegie Mellon University", + "place_B": "apple store shadyside", + "place_C": "starbucks on craig street" + }, + "intent": "What is the duration required to first walk from Carnegie Mellon University to apple store shadyside, and then drive to starbucks on craig street?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "22 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "22 min" + }, + "intent_template_id": 72 + }, + { + "sites": [ + "map" + ], + "task_id": 84, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "From my stay at {{hotel}}, what's the estimated driving time to reach {{place}}?", + "instantiation_dict": { + "hotel": "DoubleTree by Hilton New York Downtown", + "place": "Keens Steakhouse" + }, + "intent": "From my stay at DoubleTree by Hilton New York Downtown, what's the estimated driving time to reach Keens Steakhouse?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "14 minutes" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "14 minutes" + }, + "intent_template_id": 64 + }, + { + "sites": [ + "map" + ], + "task_id": 85, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "From my stay at {{hotel}}, what's the estimated driving time to reach {{place}}?", + "instantiation_dict": { + "hotel": "La Quinta Inn near the airport", + "place": "Carnegie Mellon University" + }, + "intent": "From my stay at La Quinta Inn near the airport, what's the estimated driving time to reach Carnegie Mellon University?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "30 minutes" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "30 minutes" + }, + "intent_template_id": 64 + }, + { + "sites": [ + "map" + ], + "task_id": 86, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "From my stay at {{hotel}}, what's the estimated driving time to reach {{place}}?", + "instantiation_dict": { + "hotel": "La Quinta Inn near the airport", + "place": "Upitt" + }, + "intent": "From my stay at La Quinta Inn near the airport, what's the estimated driving time to reach Upitt?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "29 minutes" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "29 minutes" + }, + "intent_template_id": 64 + }, + { + "sites": [ + "map" + ], + "task_id": 87, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "From my stay at {{hotel}}, what's the estimated driving time to reach {{place}}?", + "instantiation_dict": { + "hotel": "red roof inn", + "place": "Pittsburgh science museum" + }, + "intent": "From my stay at red roof inn, what's the estimated driving time to reach Pittsburgh science museum?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "20 minutes" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "20 minutes" + }, + "intent_template_id": 64 + }, + { + "sites": [ + "map" + ], + "task_id": 88, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "From my stay at {{hotel}}, what's the estimated driving time to reach {{place}}?", + "instantiation_dict": { + "hotel": "Homewood Suites Southpointe", + "place": "PPG Paints Arena" + }, + "intent": "From my stay at Homewood Suites Southpointe, what's the estimated driving time to reach PPG Paints Arena?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "34 minutes" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "34 minutes" + }, + "intent_template_id": 64 + }, + { + "sites": [ + "map" + ], + "task_id": 89, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Which US states border {{state}}?", + "instantiation_dict": { + "state": "Connecticut" + }, + "intent": "Which US states border Connecticut?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Rhode Island", + "Massachusetts", + "New York" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Rhode Island, Massachusetts, New York" + }, + "intent_template_id": 67 + }, + { + "sites": [ + "map" + ], + "task_id": 90, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Which US states border {{state}}?", + "instantiation_dict": { + "state": "Pennsylvania" + }, + "intent": "Which US states border Pennsylvania?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Ohio", + "Maryland", + "New York", + "New Jersey", + "Delaware", + "West Virginia" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Ohio, Maryland, New York, New Jersey, Delaware, West Virginia" + }, + "intent_template_id": 67 + }, + { + "sites": [ + "map" + ], + "task_id": 91, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Which US states border {{state}}?", + "instantiation_dict": { + "state": "Massachusetts" + }, + "intent": "Which US states border Massachusetts?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Rhode Island", + "Connecticut", + "New York", + "New Hampshire", + "Vermont" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Rhode Island, Connecticut, New York, New Hampshire, Vermont" + }, + "intent_template_id": 67 + }, + { + "sites": [ + "map" + ], + "task_id": 92, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Which US states border {{state}}?", + "instantiation_dict": { + "state": "Vermont" + }, + "intent": "Which US states border Vermont?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "New York", + "New Hampshire", + "Massachusetts" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "New York, New Hampshire, Massachusetts" + }, + "intent_template_id": 67 + }, + { + "sites": [ + "map" + ], + "task_id": 93, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Which US states border {{state}}?", + "instantiation_dict": { + "state": "New Hampshire" + }, + "intent": "Which US states border New Hampshire?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Massachusetts", + "Vermont", + "Maine" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Massachusetts, Vermont, Maine" + }, + "intent_template_id": 67 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 94, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Telll me the grand total of invoice {{id}}.", + "instantiation_dict": { + "id": "000000001" + }, + "intent": "Telll me the grand total of invoice 000000001.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "36.39" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$36.39" + }, + "intent_template_id": 274 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 95, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Telll me the grand total of invoice {{id}}.", + "instantiation_dict": { + "id": "000000002" + }, + "intent": "Telll me the grand total of invoice 000000002.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "39.64" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$39.64" + }, + "intent_template_id": 274 + }, + { + "sites": [ + "shopping" + ], + "task_id": 96, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me the status of my latest order and when will it arrive", + "instantiation_dict": {}, + "intent": "Tell me the status of my latest order and when will it arrive", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "The last order was canceled. It will never arrive." + ] + }, + "reference_url": "", + "program_html": [], + "reference_answer_raw_annotation": "The last order was canceled. It will never arrive.", + "string_note": "" + }, + "intent_template_id": 193 + }, + { + "sites": [ + "map", + "wikipedia" + ], + "task_id": 97, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the distance to drive from Carnegie Mellon University to the top computer science school in massachusetts", + "instantiation_dict": {}, + "intent": "Tell me the distance to drive from Carnegie Mellon University to the top computer science school in massachusetts", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "914km" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "914 km" + }, + "intent_template_id": 120 + }, + { + "sites": [ + "map" + ], + "task_id": 98, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Where is the nearest {{places}} to {{start}}, and what is the walking distance to it?", + "instantiation_dict": { + "places": "tea cafe", + "start": "University of Pittsburgh" + }, + "intent": "Where is the nearest tea cafe to University of Pittsburgh, and what is the walking distance to it?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Fuku Tea", + "3716", + "Forbes Avenue", + "Central Oakland", + "Pittsburgh", + "653m" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Fuku Tea, 3716, Forbes Avenue, Oakland, Central Oakland, Pittsburgh, Allegheny County, Pennsylvania, 15213, United States\n653m" + }, + "intent_template_id": 66 + }, + { + "sites": [ + "map" + ], + "task_id": 99, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Where is the nearest {{places}} to {{start}}, and what is the walking distance to it?", + "instantiation_dict": { + "places": "Five Guys", + "start": "5700 Penn Ave" + }, + "intent": "Where is the nearest Five Guys to 5700 Penn Ave, and what is the walking distance to it?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Five Guys", + "117", + "South Bouquet Street", + "North Oakland", + "Pittsburgh", + "4.0km" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Five Guys, 117, South Bouquet Street, Oakland, North Oakland, Pittsburgh, Allegheny County, Pennsylvania, 15213, United States\n4.0km" + }, + "intent_template_id": 66 + }, + { + "sites": [ + "map" + ], + "task_id": 100, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Where is the nearest {{places}} to {{start}}, and what is the walking distance to it?", + "instantiation_dict": { + "places": "Starbucks", + "start": "Carnegie Mellon" + }, + "intent": "Where is the nearest Starbucks to Carnegie Mellon, and what is the walking distance to it?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Starbucks", + "417", + "South Craig Street", + "Bellefield", + "Pittsburgh", + "557m" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Starbucks, 417, South Craig Street, Bellefield, Pittsburgh, Allegheny County, Pennsylvania, 15213, United States\n557m" + }, + "intent_template_id": 66 + }, + { + "sites": [ + "map" + ], + "task_id": 101, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Where is the nearest {{places}} to {{start}}, and what is the walking distance to it?", + "instantiation_dict": { + "places": "In-N-Out", + "start": "Upitts" + }, + "intent": "Where is the nearest In-N-Out to Upitts, and what is the walking distance to it?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no In-N-Out near University of Pittsburgh", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 66 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 102, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Display the list of issues in the {{repo}} repository that have labels related to {{label}}", + "instantiation_dict": { + "label": "help needed", + "repo": "a11yproject/a11yproject.com" + }, + "intent": "Display the list of issues in the a11yproject/a11yproject.com repository that have labels related to help needed", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/byteblaze/a11y-syntax-highlighting/-/issues/?label_name%5B%5D=help%20wanted", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 349 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 103, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Display the list of issues in the {{repo}} repository that have labels related to {{label}}", + "instantiation_dict": { + "label": "questions", + "repo": "kkroening/ffmpeg-python" + }, + "intent": "Display the list of issues in the kkroening/ffmpeg-python repository that have labels related to questions", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/kkroening/ffmpeg-python/-/issues/?label_name%5B%5D=question", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 349 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 104, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Display the list of issues in the {{repo}} repository that have labels related to {{label}}", + "instantiation_dict": { + "label": "flaky-test", + "repo": "keycloak/keycloak" + }, + "intent": "Display the list of issues in the keycloak/keycloak repository that have labels related to flaky-test", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/keycloak/keycloak/-/issues/?label_name%5B%5D=flaky-test", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 349 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 105, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Display the list of issues in the {{repo}} repository that have labels related to {{label}}", + "instantiation_dict": { + "label": "OpenAPI Generator CLI", + "repo": "OpenAPITools/openapi-generator" + }, + "intent": "Display the list of issues in the OpenAPITools/openapi-generator repository that have labels related to OpenAPI Generator CLI", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/OpenAPITools/openapi-generator/-/issues/?label_name%5B%5D=OpenAPI%20Generator%20CLI", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 349 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 106, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Display the list of issues in the {{repo}} repository that have labels related to {{label}}", + "instantiation_dict": { + "label": "BUG", + "repo": "umano/AndroidSlidingUpPanel" + }, + "intent": "Display the list of issues in the umano/AndroidSlidingUpPanel repository that have labels related to BUG", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/umano/AndroidSlidingUpPanel/-/issues/?label_name%5B%5D=BUG", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 349 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 107, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Presents the monthly count of successful orders {{period}} in MM:COUNT format", + "instantiation_dict": { + "period": "from May to December 2022" + }, + "intent": "Presents the monthly count of successful orders from May to December 2022 in MM:COUNT format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "May: 8 orders", + "June: 13 orders", + "July: 9 orders", + "August: 8 orders", + "Sepetember: 10 orders", + "October: 4 orders", + "November: 5 orders", + "December: 10 orders" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "May: 8 orders June: 13 orders July: 9 orders August: 8 orders Sepetember: 10 orders Octorbor: 4 orders November: 5 orders December: 10 orders " + }, + "intent_template_id": 270 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 108, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Presents the monthly count of successful orders {{period}} in MM:COUNT format", + "instantiation_dict": { + "period": "01/2023-05/2023" + }, + "intent": "Presents the monthly count of successful orders 01/2023-05/2023 in MM:COUNT format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "January: 12 orders", + "Feburary: 7 orders", + "March: 5 orders", + "April: 9 orders", + "May: 5 orders" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "January: 12 orders Febulary: 7 orders March: 5 orders Apirl: 9 orders May: 5 orders" + }, + "intent_template_id": 270 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 109, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Presents the monthly count of successful orders {{period}} in MM:COUNT format", + "instantiation_dict": { + "period": "from Jan to December 2022" + }, + "intent": "Presents the monthly count of successful orders from Jan to December 2022 in MM:COUNT format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "January: 11 orders", + "Feburary: 16 orders", + "March: 14 orders", + "April: 7 orders", + "May: 8 orders", + "June: 13 orders", + "July: 9 orders", + "August: 8 orders", + "Sepetember: 10 orders", + "Octorbor: 4 orders", + "November: 5 orders", + "December: 10 orders" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "January: 11 orders Feburary: 16 orders March: 14 orders April: 7 orders May: 8 orders June: 13 orders July: 9 orders August: 8 orders Sepetember: 10 orders Octorbor: 4 orders November: 5 orders December: 10 orders " + }, + "intent_template_id": 270 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 110, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Presents the monthly count of successful orders {{period}} in MM:COUNT format", + "instantiation_dict": { + "period": "from Jan to Nov 2022" + }, + "intent": "Presents the monthly count of successful orders from Jan to Nov 2022 in MM:COUNT format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "January: 11 orders", + "Feburary: 16 orders", + "March: 14 orders", + "April: 7 orders", + "May: 8 orders", + "June: 13 orders", + "July: 9 orders", + "August: 8 orders", + "Sepetember: 10 orders", + "Octorbor: 4 orders", + "November: 5 orders" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "January: 11 orders Feburary: 16 orders March: 14 orders April: 7 orders May: 8 orders June: 13 orders July: 9 orders August: 8 orders Sepetember: 10 orders Octorbor: 4 orders November: 5 orders " + }, + "intent_template_id": 270 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 111, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Presents the monthly count of successful orders {{period}} in MM:COUNT format", + "instantiation_dict": { + "period": "from Feb to Nov 2022" + }, + "intent": "Presents the monthly count of successful orders from Feb to Nov 2022 in MM:COUNT format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Feburary: 16 orders", + "March: 14 orders", + "April: 7 orders", + "May: 8 orders", + "June: 13 orders", + "July: 9 orders", + "August: 8 orders", + "Sepetember: 10 orders", + "Octorbor: 4 orders", + "November: 5 orders" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Feburary: 16 orders March: 14 orders April: 7 orders May: 8 orders June: 13 orders July: 9 orders August: 8 orders Sepetember: 10 orders Octorbor: 4 orders November: 5 orders " + }, + "intent_template_id": 270 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 112, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show me the customers who have expressed dissatisfaction with {{product}}?", + "instantiation_dict": { + "product": "Circe fleece" + }, + "intent": "Show me the customers who have expressed dissatisfaction with Circe fleece?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Hannah Lim" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Hannah Lim" + }, + "intent_template_id": 245 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 113, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show me the customers who have expressed dissatisfaction with {{product}}?", + "instantiation_dict": { + "product": "Olivia zip jacket" + }, + "intent": "Show me the customers who have expressed dissatisfaction with Olivia zip jacket?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Emma Lopez", + "Seam Miller" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Emma Lopez, Seam Miller" + }, + "intent_template_id": 245 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 114, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show me the customers who have expressed dissatisfaction with {{product}}?", + "instantiation_dict": { + "product": "Antonia racer tank" + }, + "intent": "Show me the customers who have expressed dissatisfaction with Antonia racer tank?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Shaunte", + "Merrie" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Shaunte, Merrie" + }, + "intent_template_id": 245 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 115, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show me the name of the customers who have expressed dissatisfaction with {{product}}", + "instantiation_dict": { + "product": "Chloe tank" + }, + "intent": "Show me the name of the customers who have expressed dissatisfaction with Chloe tank", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no negative review for Chloe tank", + "reference_answer_raw_annotation": "" + }, + "intent_template_id": 245 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 116, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show me the name of the customers who have expressed dissatisfaction with {{product}}?", + "instantiation_dict": { + "product": "tanks products" + }, + "intent": "Show me the name of the customers who have expressed dissatisfaction with tanks products?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Alexander", + "Carma", + "Dominic", + "Merrie", + "Monroe", + "Scotty", + "Shaunte", + "Teofila", + "Valorie" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Alexander, Carma, Dominic, Merrie, Monroe, Scotty, Shaunte, Teofila, Valorie" + }, + "intent_template_id": 245 + }, + { + "sites": [ + "shopping" + ], + "task_id": 117, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the date when I made my first purchase on this site?", + "instantiation_dict": {}, + "intent": "What is the date when I made my first purchase on this site?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "3/2/22" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3/2/22" + }, + "intent_template_id": 161 + }, + { + "sites": [ + "shopping" + ], + "task_id": 118, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I have jaw bruxism problem, show me something that could alleviate the problem.", + "instantiation_dict": {}, + "intent": "I have jaw bruxism problem, show me something that could alleviate the problem.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "", + "required_contents": { + "must_include": [ + "jaw bruxism", + "mouth guard" + ] + } + } + ] + }, + "intent_template_id": 151 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 119, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the reasons why customers like {{product}}", + "instantiation_dict": { + "product": "Antonia Racer Tank" + }, + "intent": "Tell me the reasons why customers like Antonia Racer Tank", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Its color and style is good" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Its color and style is good" + }, + "intent_template_id": 250 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 120, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the reasons why customers like {{product}}", + "instantiation_dict": { + "product": "Ana Running Short" + }, + "intent": "Tell me the reasons why customers like Ana Running Short", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "It is comfortable" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "It is comfortable" + }, + "intent_template_id": 250 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 121, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the reasons why customers like {{product}}", + "instantiation_dict": { + "product": "Circe hooded fleece" + }, + "intent": "Tell me the reasons why customers like Circe hooded fleece", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Warm and comfortable. True to size." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Warm and comfortable. True to size." + }, + "intent_template_id": 250 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 122, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the reasons why customers like {{product}}", + "instantiation_dict": { + "product": "Olivia zip jacket" + }, + "intent": "Tell me the reasons why customers like Olivia zip jacket", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Lightweight, comfortable and stylish. Good design and details." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lightweight, comfortable, and stylish. Good design and details." + }, + "intent_template_id": 250 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 123, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the reasons why customers like {{product}}", + "instantiation_dict": { + "product": "Circe's products" + }, + "intent": "Tell me the reasons why customers like Circe's products", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Warm and comfortable. True to size." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Warm and comfortable. True to size." + }, + "intent_template_id": 250 + }, + { + "sites": [ + "shopping" + ], + "task_id": 124, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the price range of {{product}} in the One Stop Market?", + "instantiation_dict": { + "product": "wireless earphone" + }, + "intent": "What is the price range of wireless earphone in the One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0.14", + "745.00" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$0.14 - $745.00" + }, + "intent_template_id": 159 + }, + { + "sites": [ + "shopping" + ], + "task_id": 125, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the price range of {{product}} in the One Stop Market?", + "instantiation_dict": { + "product": "teeth grinding mouth guard" + }, + "intent": "What is the price range of teeth grinding mouth guard in the One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1.46", + "85" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$1.46 - $85" + }, + "intent_template_id": 159 + }, + { + "sites": [ + "shopping" + ], + "task_id": 126, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the price range of {{product}} in the One Stop Market?", + "instantiation_dict": { + "product": "Canon photo printer" + }, + "intent": "What is the price range of Canon photo printer in the One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2.56", + "649.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$2.56 - $649.99" + }, + "intent_template_id": 159 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 127, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What brands appear most frequently among the top search terms?", + "instantiation_dict": {}, + "intent": "What brands appear most frequently among the top search terms?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Hollister", + "Joust", + "Antonia" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Hollister, Joust, Antonia" + }, + "intent_template_id": 1001 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 128, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What's the total number of items sold in the most recent {{k}} orders?", + "instantiation_dict": { + "k": "2" + }, + "intent": "What's the total number of items sold in the most recent 2 orders?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "9" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "9" + }, + "intent_template_id": 1002 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 129, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What's the total number of items sold in the most recent {{k}} orders?", + "instantiation_dict": { + "k": "4" + }, + "intent": "What's the total number of items sold in the most recent 4 orders?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "16" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "16" + }, + "intent_template_id": 1002 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 130, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What's the total number of items sold in the most recent {{k}} orders?", + "instantiation_dict": { + "k": "5" + }, + "intent": "What's the total number of items sold in the most recent 5 orders?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "18" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "18" + }, + "intent_template_id": 1002 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 131, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What's the total number of items sold in the most recent {{k}} orders?", + "instantiation_dict": { + "k": "7" + }, + "intent": "What's the total number of items sold in the most recent 7 orders?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "25" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "25" + }, + "intent_template_id": 1002 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 132, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "How many commits did {{user}} make to {{repo}} on {{date}}?", + "instantiation_dict": { + "user": "kilian", + "repo": "a11yproject", + "date": "3/5/2023" + }, + "intent": "How many commits did kilian make to a11yproject on 3/5/2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1" + }, + "intent_template_id": 322 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 133, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "How many commits did {{user}} make to {{repo}} on {{date}}?", + "instantiation_dict": { + "user": "Eric", + "repo": "a11yproject", + "date": "3/2" + }, + "intent": "How many commits did Eric make to a11yproject on 3/2?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "2" + }, + "intent_template_id": 322 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 134, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "How many commits did {{user}} make to {{repo}} on {{date}}?", + "instantiation_dict": { + "user": "kilian", + "repo": "a11yproject", + "date": "3/1/2023" + }, + "intent": "How many commits did kilian make to a11yproject on 3/1/2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 322 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 135, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "How many commits did {{user}} make to {{repo}} on {{date}}?", + "instantiation_dict": { + "user": "Eric and Kilian", + "repo": "a11yproject", + "date": "1/3/2023" + }, + "intent": "How many commits did Eric and Kilian make to a11yproject on 1/3/2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1" + }, + "intent_template_id": 322 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 136, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "How many commits did {{user}} make to {{repo}} on {{date}}?", + "instantiation_dict": { + "user": "Steven Woodson", + "repo": "a11y-webring.club", + "date": "2/6/2023" + }, + "intent": "How many commits did Steven Woodson make to a11y-webring.club on 2/6/2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "5" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "5" + }, + "intent_template_id": 322 + }, + { + "sites": [ + "map" + ], + "task_id": 137, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the estimated driving time between {{city1}} and {{city2}}?", + "instantiation_dict": { + "city1": "the city where the Liberty Bell is located", + "city2": "the home city of Pirates" + }, + "intent": "What is the estimated driving time between the city where the Liberty Bell is located and the home city of Pirates?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "5h 47min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "5h 47min" + }, + "intent_template_id": 51 + }, + { + "sites": [ + "map" + ], + "task_id": 138, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the estimated driving time between {{city1}} and {{city2}}?", + "instantiation_dict": { + "city1": "the big apple", + "city2": "the city with the most authentic Philly cheesesteaks" + }, + "intent": "What is the estimated driving time between the big apple and the city with the most authentic Philly cheesesteaks?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "1h 58min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1h 58min" + }, + "intent_template_id": 51 + }, + { + "sites": [ + "map" + ], + "task_id": 139, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the estimated driving time between {{city1}} and {{city2}}?", + "instantiation_dict": { + "city1": "the hometown of Joe Biden", + "city2": "Bridgeport" + }, + "intent": "What is the estimated driving time between the hometown of Joe Biden and Bridgeport?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "3h 20min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3h 20min" + }, + "intent_template_id": 51 + }, + { + "sites": [ + "map" + ], + "task_id": 140, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the estimated driving time between {{city1}} and {{city2}}?", + "instantiation_dict": { + "city1": "the city of Niagara Falls", + "city2": "the city of Yale University" + }, + "intent": "What is the estimated driving time between the city of Niagara Falls and the city of Yale University?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "8h 33min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "8h 33min" + }, + "intent_template_id": 51 + }, + { + "sites": [ + "shopping" + ], + "task_id": 141, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much I spent on {{category}} shopping during {{time}}", + "instantiation_dict": { + "category": "food-related", + "time": "March 2023" + }, + "intent": "How much I spent on food-related shopping during March 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "47.41" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$47.41" + }, + "intent_template_id": 162 + }, + { + "sites": [ + "shopping" + ], + "task_id": 142, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much I spent on {{category}} shopping during {{time}}", + "instantiation_dict": { + "category": "hair care and hair style", + "time": "Jan 2023" + }, + "intent": "How much I spent on hair care and hair style shopping during Jan 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "95.23" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$95.23" + }, + "intent_template_id": 162 + }, + { + "sites": [ + "shopping" + ], + "task_id": 143, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much I spent on {{category}} shopping during {{time}}", + "instantiation_dict": { + "category": "home decoration", + "time": "1/29/2023" + }, + "intent": "How much I spent on home decoration shopping during 1/29/2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "265.69" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$265.69" + }, + "intent_template_id": 162 + }, + { + "sites": [ + "shopping" + ], + "task_id": 144, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much I spent on {{category}} shopping during {{time}}", + "instantiation_dict": { + "category": "food", + "time": "from mid Jan to the end Jan 2023" + }, + "intent": "How much I spent on food shopping during from mid Jan to the end Jan 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 162 + }, + { + "sites": [ + "shopping" + ], + "task_id": 145, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much I spent on {{category}} shopping during {{time}}", + "instantiation_dict": { + "category": "cooking and food", + "time": "March 2022" + }, + "intent": "How much I spent on cooking and food shopping during March 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "52.35" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$52.35" + }, + "intent_template_id": 162 + }, + { + "sites": [ + "shopping" + ], + "task_id": 146, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the {{option}} configuration of the {{product}} I bought {{time}}", + "instantiation_dict": { + "option": "size", + "product": "picture frame", + "time": "Sep 2022" + }, + "intent": "What is the size configuration of the picture frame I bought Sep 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "16x24" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "16x24" + }, + "intent_template_id": 155 + }, + { + "sites": [ + "shopping" + ], + "task_id": 147, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the {{option}} configuration of the {{product}} I bought {{time}}", + "instantiation_dict": { + "option": "size", + "product": "picture frame", + "time": "2022" + }, + "intent": "What is the size configuration of the picture frame I bought 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "16x24" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "16x24" + }, + "intent_template_id": 155 + }, + { + "sites": [ + "shopping" + ], + "task_id": 148, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the {{option}} configuration of the {{product}} I bought {{time}}", + "instantiation_dict": { + "option": "color", + "product": "picture frame", + "time": "Sep 2022" + }, + "intent": "What is the color configuration of the picture frame I bought Sep 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Mist" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Mist" + }, + "intent_template_id": 155 + }, + { + "sites": [ + "shopping" + ], + "task_id": 149, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the {{option}} configuration of the {{product}} I bought {{time}}", + "instantiation_dict": { + "option": "color", + "product": "artifical plants", + "time": "Feb 2023" + }, + "intent": "What is the color configuration of the artifical plants I bought Feb 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Green-vines" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Green-vines" + }, + "intent_template_id": 155 + }, + { + "sites": [ + "shopping" + ], + "task_id": 150, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the {{option}} configuration of the {{product}} I bought {{time}}", + "instantiation_dict": { + "option": "price", + "product": "fake tree", + "time": "Jan 2023" + }, + "intent": "What is the price configuration of the fake tree I bought Jan 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "260.69" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "260.69" + }, + "intent_template_id": 155 + }, + { + "sites": [ + "map" + ], + "task_id": 151, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the minimum travel time by car from {{location1}} to {{location2}}?", + "instantiation_dict": { + "location1": "CMU", + "location2": "University of Pittsburgh" + }, + "intent": "What is the minimum travel time by car from CMU to University of Pittsburgh?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "4min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "4min" + }, + "intent_template_id": 36 + }, + { + "sites": [ + "map" + ], + "task_id": 152, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the minimum travel time by car from {{location1}} to {{location2}}?", + "instantiation_dict": { + "location1": "Schenley park", + "location2": "Upitt" + }, + "intent": "What is the minimum travel time by car from Schenley park to Upitt?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "4min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "4min" + }, + "intent_template_id": 36 + }, + { + "sites": [ + "map" + ], + "task_id": 153, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the minimum travel time by car from {{location1}} to {{location2}}?", + "instantiation_dict": { + "location1": "REI", + "location2": "CMU" + }, + "intent": "What is the minimum travel time by car from REI to CMU?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "7min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "7min" + }, + "intent_template_id": 36 + }, + { + "sites": [ + "map" + ], + "task_id": 154, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the minimum travel time by car from {{location1}} to {{location2}}?", + "instantiation_dict": { + "location1": "CMU gates building", + "location2": "Schenley park" + }, + "intent": "What is the minimum travel time by car from CMU gates building to Schenley park?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "4min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "4min" + }, + "intent_template_id": 36 + }, + { + "sites": [ + "map" + ], + "task_id": 155, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the minimum travel time by car from {{location1}} to {{location2}}?", + "instantiation_dict": { + "location1": "Animal Rescue League of Pittsburgh", + "location2": "Schenley park" + }, + "intent": "What is the minimum travel time by car from Animal Rescue League of Pittsburgh to Schenley park?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "9min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "9min" + }, + "intent_template_id": 36 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 156, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Checkout merge requests assigned to me", + "instantiation_dict": {}, + "intent": "Checkout merge requests assigned to me", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/dashboard/merge_requests?assignee_username=byteblaze", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 290 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 157, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show all customers", + "instantiation_dict": {}, + "intent": "Show all customers", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/customer/index/", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 255 + }, + { + "sites": [ + "shopping" + ], + "task_id": 158, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all {{num}} cards", + "instantiation_dict": { + "num": 11 + }, + "intent": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all 11 cards", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/heiying-game-card-case-for-nintendo-switch-switch-oled-game-card-or-micro-sd-memory-cards-portable-switch-game-memory-card-storage-with-24-game-card-slots-and-24-micro-sd-card-slots-black.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 171 + }, + { + "sites": [ + "shopping" + ], + "task_id": 159, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all {{num}} cards", + "instantiation_dict": { + "num": 31 + }, + "intent": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all 31 cards", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/game-card-holder-storage-case-for-nintendo-switch-games-or-ps-vita-game-case-or-sd-memory-cards-black.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 171 + }, + { + "sites": [ + "shopping" + ], + "task_id": 160, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all {{num}} cards", + "instantiation_dict": { + "num": 6 + }, + "intent": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all 6 cards", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/heiying-game-card-case-for-nintendo-switch-switch-oled-game-card-or-micro-sd-memory-cards-portable-switch-game-memory-card-storage-with-24-game-card-slots-and-24-micro-sd-card-slots-black.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 171 + }, + { + "sites": [ + "shopping" + ], + "task_id": 161, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all {{num}} cards", + "instantiation_dict": { + "num": 23 + }, + "intent": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all 23 cards", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/heiying-game-card-case-for-nintendo-switch-switch-oled-game-card-or-micro-sd-memory-cards-portable-switch-game-memory-card-storage-with-24-game-card-slots-and-24-micro-sd-card-slots-black.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 171 + }, + { + "sites": [ + "shopping" + ], + "task_id": 162, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all {{num}} cards", + "instantiation_dict": { + "num": 40 + }, + "intent": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all 40 cards", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/game-card-holder-storage-case-for-nintendo-switch-games-or-ps-vita-game-case-or-sd-memory-cards-black.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 171 + }, + { + "sites": [ + "shopping" + ], + "task_id": 163, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/ostent-16gb-memory-card-stick-storage-for-sony-ps-vita-psv1000-2000-pch-z081-z161-z321-z641.html", + "geolocation": null, + "intent_template": "What are the main criticisms of this product? Please extract the relevant sentences.", + "instantiation_dict": {}, + "intent": "What are the main criticisms of this product? Please extract the relevant sentences.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "I ordered the 16gb but I only got 14 gigs even though I formatted the card", + "The memory card is kind of slow on games and downloads", + "No original packaging It's used and the previous owners data has not been erased", + "The product is a legit sony hardware that have been owned by someone else before", + "The media could not be loaded", + "I could not format the card so I wasn\u2019t able to use it for my VITA" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "I ordered the 16gb but I only got 14 gigs even though I formatted the card. The memory card is kind of slow on games and downloads. No original packaging It's used and the previous owners data has not been erased. The product is a legit sony hardware that have been owned by someone else before The media could not be loaded. I could not format the card so I wasn\u2019t able to use it for my VITA" + }, + "intent_template_id": 136 + }, + { + "sites": [ + "shopping" + ], + "task_id": 164, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/mineralogie-all-natural-lip-gloss-ruby-rose.html", + "geolocation": null, + "intent_template": "What are the main criticisms of this product? Please extract the relevant sentences.", + "instantiation_dict": {}, + "intent": "What are the main criticisms of this product? Please extract the relevant sentences.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Dry", + "Uneven color" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "DryUneven color" + }, + "intent_template_id": 136 + }, + { + "sites": [ + "shopping" + ], + "task_id": 165, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sandgrens-swedish-handmade-wooden-clog-sandal-copenhagen.html", + "geolocation": null, + "intent_template": "What are the main criticisms of this product? Please extract the relevant sentences.", + "instantiation_dict": {}, + "intent": "What are the main criticisms of this product? Please extract the relevant sentences.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "The 39 was too small. I am afraid the 40 will be too big", + "I was very sad when the shoe rubbed up against my baby toe", + "I had to return them because I knew in time it would tear up my feet", + "The problem is that the strap is made of some really stiff leather and is painful to my heel", + "The front is also uncomfortably tight", + "The Dansko's were similar (not as bad) and loosened up over time" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "The 39 was too small. I am afraid the 40 will be too big. I was very sad when the shoe rubbed up against my baby toe. I had to return them because I knew in time it would tear up my feet. The problem is that the strap is made of some really stiff leather and is painful to my heel. The front is also uncomfortably tight. The Dansko's were similar (not as bad) and loosened up over time." + }, + "intent_template_id": 136 + }, + { + "sites": [ + "shopping" + ], + "task_id": 166, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sensodyne-repair-protect-whitening-toothpaste-with-fluoride-3-4-oz-pack-of-3.html", + "geolocation": null, + "intent_template": "What are the main criticisms of this product? Please extract the relevant sentences.", + "instantiation_dict": {}, + "intent": "What are the main criticisms of this product? Please extract the relevant sentences.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "there is no existing criticism", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 136 + }, + { + "sites": [ + "shopping" + ], + "task_id": 167, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/photosmart-plus-b209-clr-inkjetfb-p-s-c-usb-wrls-1.html", + "geolocation": null, + "intent_template": "What are the main criticisms of this product? Please extract the relevant sentences.", + "instantiation_dict": {}, + "intent": "What are the main criticisms of this product? Please extract the relevant sentences.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "The wireless connection works on a whim (about 40% of the time I've owned it)", + "It seems to constantly run out of ink", + "Cartridge prices are less than some printers I've had", + "This printer seems to have more reasons NOT to work (none that are findable or correctable) Ex: error boxes saying that it's out of paper when it automatically switches to photo printing for some reason", + "Scanner is as slow as my first scanner I ever owned in the mid-90's", + "For the $176 I paid, there isn't even a fax component on it. I guess the \"PLUS\" part of it's name is in reference to the migraines it causes when you can't figure out the new reason why it's not working for the 10th time in the past 2 months." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "The wireless connection works on a whim (about 40% of the time I've owned it). It seems to constantly run out of ink. Cartridge prices are less than some printers I've had, but now I understand why. This printer seems to have more reasons NOT to work (none that are findable or correctable) Ex: error boxes saying that it's out of paper when it automatically switches to photo printing for some reason. Scanner is as slow as my first scanner I ever owned in the mid-90's. For the $176 I paid, there isn't even a fax component on it. I guess the \"PLUS\" part of it's name is in reference to the migraines it causes when you can't figure out the new reason why it's not working for the 10th time in the past 2 months." + }, + "intent_template_id": 136 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 168, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Tell me the full names of the repositories where I made contributions and they got {{description}} stars?", + "instantiation_dict": { + "description": "more than 100" + }, + "intent": "Tell me the full names of the repositories where I made contributions and they got more than 100 stars?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "No repo found", + "reference_answer_raw_annotation": "No repo found" + }, + "intent_template_id": 289 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 169, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Tell me the full names of the repositories where I made contributions and they got {{description}} stars?", + "instantiation_dict": { + "description": "the most" + }, + "intent": "Tell me the full names of the repositories where I made contributions and they got the most stars?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "a11yproject.com", + "design" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "a11yproject.com, Primer/design" + }, + "intent_template_id": 289 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 170, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Tell me the full names of the repositories where I made contributions and they got {{description}} stars?", + "instantiation_dict": { + "description": "the least" + }, + "intent": "Tell me the full names of the repositories where I made contributions and they got the least stars?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "cloud-to-butt", + "dotfiles", + "timeit", + "solarized-prism-theme", + "gimmiethat.space", + "remove-board-movement-events-from-the-github-issue-timeline" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "cloud-to-butt, dotfiles, timeit, solarized-prism-theme, gimmiethat.space, remove-board-movement-events-from-the-github-issue-timeline" + }, + "intent_template_id": 289 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 171, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Tell me the full names of the repositories where I made contributions and they got {{description}} stars?", + "instantiation_dict": { + "description": "less than 5" + }, + "intent": "Tell me the full names of the repositories where I made contributions and they got less than 5 stars?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "a11y-syntax-highlighting", + "a11y-webring.club", + "accessible-html-content-patterns", + "ericwbailey.website", + "cloud-to-butt", + "dotfiles", + "timeit", + "solarized-prism-theme", + "gimmiethat.space", + "remove-board-movement-events-from-the-github-issue-timeline" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "a11y-syntax-highlighting, a11y-webring.club, accessible-html-content-patterns, ericwbailey.website, cloud-to-butt, dotfiles, timeit, solarized-prism-theme, gimmiethat.space, remove-board-movement-events-from-the-github-issue-timeline" + }, + "intent_template_id": 289 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 172, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Tell me the full names of the repositories where I made contributions and they got {{description}} stars?", + "instantiation_dict": { + "description": "no" + }, + "intent": "Tell me the full names of the repositories where I made contributions and they got no stars?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "cloud-to-butt", + "dotfiles", + "timeit", + "solarized-prism-theme", + "gimmiethat.space", + "remove-board-movement-events-from-the-github-issue-timeline" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "cloud-to-butt, dotfiles, timeit, solarized-prism-theme, gimmiethat.space, remove-board-movement-events-from-the-github-issue-timeline" + }, + "intent_template_id": 289 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 173, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open my latest updated issue that has keyword \"{{keyword}}\" in its title to check if it is closed", + "instantiation_dict": { + "keyword": "better" + }, + "intent": "Open my latest updated issue that has keyword \"better\" in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "fuzzy_match": ["No, it is open"] + }, + "reference_url": "__GITLAB__/byteblaze/empathy-prompts/-/issues/8", + "program_html": [], + "reference_answer_raw_annotation": "Not closed", + "string_note": "", + "url_note": "GOLD in PRED" + }, + "intent_template_id": 310 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 174, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open my latest updated issue that has keyword \"{{keyword}}\" in its title to check if it is closed", + "instantiation_dict": { + "keyword": "feature" + }, + "intent": "Open my latest updated issue that has keyword \"feature\" in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "fuzzy_match": ["No, it is open"] + }, + "reference_url": "__GITLAB__/byteblaze/a11y-webring.club/-/issues/71", + "program_html": [], + "reference_answer_raw_annotation": "Not closed", + "string_note": "" + }, + "intent_template_id": 310 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 175, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open my latest updated issue that has keyword \"{{keyword}}\" in its title to check if it is closed", + "instantiation_dict": { + "keyword": "dependency" + }, + "intent": "Open my latest updated issue that has keyword \"dependency\" in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "fuzzy_match": ["No, it is open"] + }, + "reference_url": "__GITLAB__/byteblaze/empathy-prompts/-/issues/18", + "program_html": [], + "reference_answer_raw_annotation": "Not closed", + "string_note": "" + }, + "intent_template_id": 310 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 176, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open my latest updated issue that has keyword \"{{keyword}}\" in its title to check if it is closed", + "instantiation_dict": { + "keyword": "theme editor" + }, + "intent": "Open my latest updated issue that has keyword \"theme editor\" in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "fuzzy_match": ["No, it is open"] + }, + "reference_url": "__GITLAB__/byteblaze/a11y-syntax-highlighting/-/issues/1", + "program_html": [], + "reference_answer_raw_annotation": "Not closed", + "string_note": "" + }, + "intent_template_id": 310 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 177, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open my latest updated issue that has keyword \"{{keyword}}\" in its title to check if it is closed", + "instantiation_dict": { + "keyword": "homepage content" + }, + "intent": "Open my latest updated issue that has keyword \"homepage content\" in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "fuzzy_match": ["Yes, it is closed"] + }, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/issues/719", + "program_html": [], + "reference_answer_raw_annotation": "closed", + "string_note": "" + }, + "intent_template_id": 310 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 178, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open my latest created issue that has {{keyword}} in its title to check if it is closed", + "instantiation_dict": { + "keyword": "better" + }, + "intent": "Open my latest created issue that has better in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "Yes" + }, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/issues/566", + "program_html": [], + "reference_answer_raw_annotation": "Closed", + "string_note": "" + }, + "intent_template_id": 500 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 179, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open my latest created issue that has {{keyword}} in its title to check if it is closed", + "instantiation_dict": { + "keyword": "feature" + }, + "intent": "Open my latest created issue that has feature in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "Yes" + }, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/issues/1517", + "program_html": [], + "reference_answer_raw_annotation": "Closed", + "string_note": "" + }, + "intent_template_id": 500 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 180, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open my latest created issue that has {{keyword}} in its title to check if it is closed", + "instantiation_dict": { + "keyword": "dependency" + }, + "intent": "Open my latest created issue that has dependency in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "No" + }, + "reference_url": "__GITLAB__/byteblaze/empathy-prompts/-/issues/18", + "program_html": [], + "reference_answer_raw_annotation": "Not closed", + "string_note": "" + }, + "intent_template_id": 500 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 181, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open my latest created issue that has {{keyword}} in its title to check if it is closed", + "instantiation_dict": { + "keyword": "theme editor" + }, + "intent": "Open my latest created issue that has theme editor in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "No" + }, + "reference_url": "__GITLAB__/byteblaze/a11y-syntax-highlighting/-/issues/1", + "program_html": [], + "reference_answer_raw_annotation": "Not closed", + "string_note": "" + }, + "intent_template_id": 500 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 182, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open my latest created issue that has {{keyword}} in its title to check if it is closed", + "instantiation_dict": { + "keyword": "homepage content" + }, + "intent": "Open my latest created issue that has homepage content in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "Yes" + }, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/issues/719", + "program_html": [], + "reference_answer_raw_annotation": "closed", + "string_note": "" + }, + "intent_template_id": 500 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 183, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Give me the {{Attribute}} of the products that have {{N}} units left", + "instantiation_dict": { + "Attribute": "SKU", + "N": "10" + }, + "intent": "Give me the SKU of the products that have 10 units left", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no product that has 10 quantities left.", + "reference_answer_raw_annotation": "There is no product that has 10 quantities left." + }, + "intent_template_id": 368 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 184, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Give me the {{Attribute}} of the products that have {{N}} units left", + "instantiation_dict": { + "Attribute": "name", + "N": "0" + }, + "intent": "Give me the name of the products that have 0 units left", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Sinbad Fitness Tank" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Sinbad Fitness Tank" + }, + "intent_template_id": 368 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 185, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Give me the {{Attribute}} of the products that have {{N}} units left", + "instantiation_dict": { + "Attribute": "brand", + "N": "3" + }, + "intent": "Give me the brand of the products that have 3 units left", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Eos", + "Minerva" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Eos, Minerva" + }, + "intent_template_id": 368 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 186, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Give me the {{Attribute}} of the products that have {{N}} units left", + "instantiation_dict": { + "Attribute": "product names and the sizes", + "N": "2-3" + }, + "intent": "Give me the product names and the sizes of the products that have 2-3 units left", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Eos V-Neck Hoodie: S", + "Minera Luma Tech V-Tee: XS" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Eos V-Neck Hoodie: S Minera Luma Tech V-Tee: XS" + }, + "intent_template_id": 368 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 187, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Give me the {{Attribute}} of the products that have {{N}} units left", + "instantiation_dict": { + "Attribute": "SKU", + "N": "1-3" + }, + "intent": "Give me the SKU of the products that have 1-3 units left", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "WH11-S-Blue", + "WS08-XS-Blue" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "WH11-S-Blue, WS08-XS-Blue" + }, + "intent_template_id": 368 + }, + { + "sites": [ + "shopping" + ], + "task_id": 188, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me the total cost of my latest {{status}} order?", + "instantiation_dict": { + "status": "cancelled" + }, + "intent": "Tell me the total cost of my latest cancelled order?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "365.42" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "365.42" + }, + "intent_template_id": 214 + }, + { + "sites": [ + "shopping" + ], + "task_id": 189, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me the total cost of my latest {{status}} order?", + "instantiation_dict": { + "status": "pending" + }, + "intent": "Tell me the total cost of my latest pending order?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "754.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "754.99" + }, + "intent_template_id": 214 + }, + { + "sites": [ + "shopping" + ], + "task_id": 190, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me the total cost of my latest {{status}} order?", + "instantiation_dict": { + "status": "complete" + }, + "intent": "Tell me the total cost of my latest complete order?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "65.32" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "65.32" + }, + "intent_template_id": 214 + }, + { + "sites": [ + "shopping" + ], + "task_id": 191, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me the total cost of my latest {{status}} order?", + "instantiation_dict": { + "status": "processing" + }, + "intent": "Tell me the total cost of my latest processing order?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no order of \"processing\" status", + "reference_answer_raw_annotation": "There is no order of \"processing\" status" + }, + "intent_template_id": 214 + }, + { + "sites": [ + "shopping" + ], + "task_id": 192, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me the total cost of my latest {{status}} order?", + "instantiation_dict": { + "status": "non-cancelled" + }, + "intent": "Tell me the total cost of my latest non-cancelled order?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "754.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "754.99" + }, + "intent_template_id": 214 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 193, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the total payment amount of the last {{N}} {{status}} orders", + "instantiation_dict": { + "status": "completed", + "N": "2" + }, + "intent": "Get the total payment amount of the last 2 completed orders", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "182.4" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "182.4" + }, + "intent_template_id": 367 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 194, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the total payment amount of the last {{N}} {{status}} orders", + "instantiation_dict": { + "status": "completed", + "N": "5" + }, + "intent": "Get the total payment amount of the last 5 completed orders", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "555.2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "555.2" + }, + "intent_template_id": 367 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 195, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the total payment amount of the last {{N}} {{status}} orders", + "instantiation_dict": { + "status": "pending", + "N": "5" + }, + "intent": "Get the total payment amount of the last 5 pending orders", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "885.4" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "885.4" + }, + "intent_template_id": 367 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 196, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Compare the payment difference of the last {{N}} {{status_1}} orders and {{status_2}} orders", + "instantiation_dict": { + "status_1": "cancelled", + "status_2": "completed", + "N": "4" + }, + "intent": "Compare the payment difference of the last 4 cancelled orders and completed orders", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "194.25" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "194.25" + }, + "intent_template_id": 367 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 197, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the total payment amount of the last {{N}} {{status}} orders", + "instantiation_dict": { + "status": "non-cancelled", + "N": "5" + }, + "intent": "Get the total payment amount of the last 5 non-cancelled orders", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "778.2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "annotation_note": "219.4+210+166.4+93.4+89", + "reference_answer_raw_annotation": "778.2" + }, + "intent_template_id": 367 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 198, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "customer name", + "status": "most recent cancelled" + }, + "intent": "Get the customer name of the most recent cancelled order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Lily Potter" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lily Potter" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 199, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "order ID", + "status": "newest pending" + }, + "intent": "Get the order ID of the newest pending order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "299" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "299" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 200, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "billing name", + "status": "oldest complete" + }, + "intent": "Get the billing name of the oldest complete order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "John Lee" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "John Lee" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 201, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "customer name", + "status": "earliest fraud suspect" + }, + "intent": "Get the customer name of the earliest fraud suspect order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no order of \"fraud suspect\" status", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 202, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "date", + "status": "most recent canlled" + }, + "intent": "Get the date of the most recent canlled order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "May 23 2023" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "May 23, 2023" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 203, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "purchase date and order id", + "status": "most recent pending" + }, + "intent": "Get the purchase date and order id of the most recent pending order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "order id: 000000299", + "purchase date: May 31, 2023" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "000000299, May 31, 2023, 2:55:09 AM" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 204, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "product name and discounted price (low to high)", + "status": "most recent completed" + }, + "intent": "Get the product name and discounted price (low to high) of the most recent completed order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Rapha Sports Short: $35", + "Thorpe Track Pant: $54.4", + "Mach Street Sweatshirt: $62" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Rapha Sports Short: $35 Thorpe Track Pant: $54.4 Mach Street Sweatshirt: $62" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 205, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make on {{date}}?", + "instantiation_dict": { + "user": "kilian", + "date": "3/5/2023" + }, + "intent": "How many commits did kilian make on 3/5/2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1" + }, + "intent_template_id": 320 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 206, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make on {{date}}?", + "instantiation_dict": { + "user": "Eric", + "date": "3/2" + }, + "intent": "How many commits did Eric make on 3/2?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "2" + }, + "intent_template_id": 320 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 207, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make on {{date}} in total?", + "instantiation_dict": { + "user": "Eric and Kilian", + "date": "1/3/2023" + }, + "intent": "How many commits did Eric and Kilian make on 1/3/2023 in total?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1" + }, + "intent_template_id": 320 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 208, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Find the customer name and email with phone number {{PhoneNum}}", + "instantiation_dict": { + "PhoneNum": "+1 2058812302" + }, + "intent": "Find the customer name and email with phone number +1 2058812302", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "John Smith", + "john.smith.xyz@gmail.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "John Smith, john.smith.xyz@gmail.com" + }, + "intent_template_id": 364 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 209, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Find the customer name and email with phone number {{PhoneNum}}", + "instantiation_dict": { + "PhoneNum": "2137418080" + }, + "intent": "Find the customer name and email with phone number 2137418080", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Jennifer White", + "jennifer.white@yahoo.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Jennifer White, jennifer.white@yahoo.com" + }, + "intent_template_id": 364 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 210, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Find the customer name and email with phone number {{PhoneNum}}", + "instantiation_dict": { + "PhoneNum": "2065555555" + }, + "intent": "Find the customer name and email with phone number 2065555555", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Adam Garcia", + "gamingpro456@gmail.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Adam Garcia, gamingpro456@gmail.com" + }, + "intent_template_id": 364 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 211, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Find the customer name and email with phone number {{PhoneNum}}", + "instantiation_dict": { + "PhoneNum": "8015551212" + }, + "intent": "Find the customer name and email with phone number 8015551212", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Sean Miller", + "sean.miller@gmail.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Sean Miller, sean.miller@gmail.com" + }, + "intent_template_id": 364 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 212, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Find the customer name and email with phone number {{PhoneNum}}", + "instantiation_dict": { + "PhoneNum": "555-229-3326" + }, + "intent": "Find the customer name and email with phone number 555-229-3326", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Veronica Costello", + "roni_cost@example.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Veronica Costello, roni_cost@example.com" + }, + "intent_template_id": 364 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 213, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What are the key aspects that the customers don't like about {{product}}", + "instantiation_dict": { + "product": "Antonia Racer Tank" + }, + "intent": "What are the key aspects that the customers don't like about Antonia Racer Tank", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Not suitable for high-impact workouts" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Not suitable for high-impact workouts" + }, + "intent_template_id": 249 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 214, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What are the key aspects that the customers don't like about {{product}}", + "instantiation_dict": { + "product": "Zing Jump Rope" + }, + "intent": "What are the key aspects that the customers don't like about Zing Jump Rope", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "It is hard to find the right size. Won't last long" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "It is hard to find the right size. Won't last long" + }, + "intent_template_id": 249 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 215, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What are the key aspects that the customers don't like about {{product}}", + "instantiation_dict": { + "product": "Circe ice fleece" + }, + "intent": "What are the key aspects that the customers don't like about Circe ice fleece", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Material quality, fit, insufficient warmth, color" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Material quality, fit, insufficient warmth, color" + }, + "intent_template_id": 249 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 216, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What are the key aspects that the customers don't like about {{product}}", + "instantiation_dict": { + "product": "Electra Bra Top" + }, + "intent": "What are the key aspects that the customers don't like about Electra Bra Top", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Not true to size" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Not true to size" + }, + "intent_template_id": 249 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 217, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What are the key aspects that the customers don't like about {{product}}", + "instantiation_dict": { + "product": "Pursuit Tone Band" + }, + "intent": "What are the key aspects that the customers don't like about Pursuit Tone Band", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Insufficient resistance for their workouts." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Insufficient resistance for their workouts." + }, + "intent_template_id": 249 + }, + { + "sites": [ + "map" + ], + "task_id": 218, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the walking distance from nearby hotels to {{location}} that take at most {{n}} minutes?", + "instantiation_dict": { + "location": "CMU, Pittsburgh", + "n": "5" + }, + "intent": "Show me the walking distance from nearby hotels to CMU, Pittsburgh that take at most 5 minutes?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no hotel near CMU that is within 5 minutes walking distance", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 41 + }, + { + "sites": [ + "map" + ], + "task_id": 219, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the walking distance from nearby hotels to {{location}} that take at most {{n}} minutes?", + "instantiation_dict": { + "location": "Pittsburgh airport", + "n": "3" + }, + "intent": "Show me the walking distance from nearby hotels to Pittsburgh airport that take at most 3 minutes?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no hotel near CMU that is within 5 minutes walking distance", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 41 + }, + { + "sites": [ + "map" + ], + "task_id": 220, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the walking distance from nearby hotels to {{location}} that take at most {{n}} minutes?", + "instantiation_dict": { + "location": "Gardner Steel Conference Center,", + "n": 5 + }, + "intent": "Show me the walking distance from nearby hotels to Gardner Steel Conference Center, that take at most 5 minutes?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Wyndham Pittsburgh University Cente: 375m", + "The Oaklander Hotel: 338m" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Wyndham Pittsburgh University Cente: 375 m\nThe Oaklander Hotel: 338 m" + }, + "intent_template_id": 41 + }, + { + "sites": [ + "map" + ], + "task_id": 221, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "I am at CMU Pittsburgh, how long it takes to the nearest {{location}} with different transportation methods?", + "instantiation_dict": { + "location": "USPS postal office" + }, + "intent": "I am at CMU Pittsburgh, how long it takes to the nearest USPS postal office with different transportation methods?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Walk: 1 minute", + "Drive: less than 1 minute", + "Bike: less than 1 minute" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Walk: 1 minute to walk and\nDrive: less than 1 minute\nBike: less than 1 minute" + }, + "intent_template_id": 35 + }, + { + "sites": [ + "map" + ], + "task_id": 222, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "I am at CMU Pittsburgh, how long it takes to drive to the nearest {{location}}", + "instantiation_dict": { + "location": "cold stone ice cream" + }, + "intent": "I am at CMU Pittsburgh, how long it takes to drive to the nearest cold stone ice cream", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "3min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3min" + }, + "intent_template_id": 35 + }, + { + "sites": [ + "map" + ], + "task_id": 223, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "I am at CMU Pittsburgh, how long it takes to drive to the nearest {{location}}", + "instantiation_dict": { + "location": "Mcdonald's" + }, + "intent": "I am at CMU Pittsburgh, how long it takes to drive to the nearest Mcdonald's", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "4min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "4min" + }, + "intent_template_id": 35 + }, + { + "sites": [ + "map" + ], + "task_id": 224, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "I am at CMU Pittsburgh, how long it takes to drive to the nearest {{location}}", + "instantiation_dict": { + "location": "wendys" + }, + "intent": "I am at CMU Pittsburgh, how long it takes to drive to the nearest wendys", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "3min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3min" + }, + "intent_template_id": 35 + }, + { + "sites": [ + "shopping" + ], + "task_id": 225, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What do customers say about {{product_type}} from {{manufature}}", + "instantiation_dict": { + "product_type": "brush", + "manufature": "sephora" + }, + "intent": "What do customers say about brush from sephora", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The sephora brushes don't have reviews", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 135 + }, + { + "sites": [ + "shopping" + ], + "task_id": 226, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the price range for products from {{brand}}?", + "instantiation_dict": { + "brand": "Amazon basic" + }, + "intent": "What is the price range for products from Amazon basic?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "5.49", + "375.19" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$5.49 - $375.19" + }, + "intent_template_id": 370 + }, + { + "sites": [ + "shopping" + ], + "task_id": 227, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the price range for products from {{brand}}?", + "instantiation_dict": { + "brand": "EYZUTAK" + }, + "intent": "What is the price range for products from EYZUTAK?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "9.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$9.99" + }, + "intent_template_id": 370 + }, + { + "sites": [ + "shopping" + ], + "task_id": 228, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the price range for products from {{brand}}?", + "instantiation_dict": { + "brand": "sephora" + }, + "intent": "What is the price range for products from sephora?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "18.18", + "94.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$18.18 - $94.99" + }, + "intent_template_id": 370 + }, + { + "sites": [ + "shopping" + ], + "task_id": 229, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the price range for products from {{brand}}?", + "instantiation_dict": { + "brand": "ugreen" + }, + "intent": "What is the price range for products from ugreen?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "6.99", + "38.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$6.99 - $38.99" + }, + "intent_template_id": 370 + }, + { + "sites": [ + "shopping" + ], + "task_id": 230, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the price range for products from {{brand}}?", + "instantiation_dict": { + "brand": "Perricone MD" + }, + "intent": "What is the price range for products from Perricone MD?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "35", + "149" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$35 - $149" + }, + "intent_template_id": 370 + }, + { + "sites": [ + "shopping" + ], + "task_id": 231, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Get the order number of my most recent {{status}} order ", + "instantiation_dict": { + "status": "cancelled" + }, + "intent": "Get the order number of my most recent cancelled order ", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "170" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "000000170" + }, + "intent_template_id": 213 + }, + { + "sites": [ + "shopping" + ], + "task_id": 232, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Get the order number of my most recent {{status}} order ", + "instantiation_dict": { + "status": "pending" + }, + "intent": "Get the order number of my most recent pending order ", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "189" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "000000189" + }, + "intent_template_id": 213 + }, + { + "sites": [ + "shopping" + ], + "task_id": 233, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Get the order number of my most recent {{status}} order ", + "instantiation_dict": { + "status": "complete" + }, + "intent": "Get the order number of my most recent complete order ", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "180" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "000000180" + }, + "intent_template_id": 213 + }, + { + "sites": [ + "shopping" + ], + "task_id": 234, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Get the order number of my most recent {{status}} order ", + "instantiation_dict": { + "status": "on hold" + }, + "intent": "Get the order number of my most recent on hold order ", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "there is no on hold order", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 213 + }, + { + "sites": [ + "shopping" + ], + "task_id": 235, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Get the order number of my most recent {{status}} order ", + "instantiation_dict": { + "status": "under delivery" + }, + "intent": "Get the order number of my most recent under delivery order ", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no under delivery order", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 213 + }, + { + "sites": [ + "map" + ], + "task_id": 236, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Where is the nearest {{location}} from {{location2}} {{condition}}", + "instantiation_dict": { + "location": "pharmacy", + "location2": "Carnegie Mellon", + "condition": "I can walk within 20mins" + }, + "intent": "Where is the nearest pharmacy from Carnegie Mellon I can walk within 20mins", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Schiller's Pharmacy", + "811", + "South Aiken Avenue", + "Shadyside", + "Pittsburgh" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Schiller's Pharmacy, 811, South Aiken Avenue, Shadyside, Pittsburgh, Allegheny County, 15232, United States" + }, + "intent_template_id": 39 + }, + { + "sites": [ + "map" + ], + "task_id": 237, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Where is the nearest {{location}} from {{location2}} {{condition}}", + "instantiation_dict": { + "location": "gas station", + "location2": "CMU", + "condition": "" + }, + "intent": "Where is the nearest gas station from CMU ", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Sunoco", + "North Craig Street", + "North Oakland", + "Pittsburgh" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Sunoco, North Craig Street, North Oakland, Pittsburgh, Allegheny County, 15213, United States" + }, + "intent_template_id": 39 + }, + { + "sites": [ + "shopping" + ], + "task_id": 238, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I am doing a market survey for one stop market, show me the most expensive product from {{product_category}} category", + "instantiation_dict": { + "product_category": "PS4 accessories" + }, + "intent": "I am doing a market survey for one stop market, show me the most expensive product from PS4 accessories category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/astro-gaming-a50-wireless-headset-base-station-gen-4-compatible-with-ps5-ps4-pc-mac-black-silver.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 138 + }, + { + "sites": [ + "shopping" + ], + "task_id": 239, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I am doing a market survey for one stop market, show me the most expensive product from {{product_category}} category", + "instantiation_dict": { + "product_category": "nutrition bars and drinks" + }, + "intent": "I am doing a market survey for one stop market, show me the most expensive product from nutrition bars and drinks category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/kellogg-s-special-k-protein-meal-bars-chocolate-caramel-12-7oz-6-count.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 138 + }, + { + "sites": [ + "shopping" + ], + "task_id": 240, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I am doing a market survey for one stop market, show me the most expensive product from {{product_category}} category", + "instantiation_dict": { + "product_category": "competative swimwear" + }, + "intent": "I am doing a market survey for one stop market, show me the most expensive product from competative swimwear category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/women-cross-flower-beachwear-tankini-bandeau-bandage-bikini-set-push-up-swimwear-bathing-suit-two-pieces-swimsuits.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 138 + }, + { + "sites": [ + "shopping" + ], + "task_id": 241, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I am doing a market survey for one stop market, show me the most expensive product from {{product_category}} category", + "instantiation_dict": { + "product_category": "skin care tool" + }, + "intent": "I am doing a market survey for one stop market, show me the most expensive product from skin care tool category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/professional-medi-spa-scar-stretch-mark-reduction-system.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 138 + }, + { + "sites": [ + "shopping" + ], + "task_id": 242, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I am doing a market survey for one stop market, show me the most expensive product from {{product_category}} category", + "instantiation_dict": { + "product_category": "Household Supplies" + }, + "intent": "I am doing a market survey for one stop market, show me the most expensive product from Household Supplies category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/lynx-battery-12v-200ah-lithium-iron-phosphate-lifepo4-prismatic-deep-cell-battery-set-of-4-3-2v-cells-with-3-bus-bars-and-8-lug-nuts-for-rv-solar-marine-off-grid-applications.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 138 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 243, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show me the {{information}} of the customer who is the most unhappy with {{product}}", + "instantiation_dict": { + "information": "email address", + "product": "Circe fleece" + }, + "intent": "Show me the email address of the customer who is the most unhappy with Circe fleece", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "hannah.lim@gmail.com" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "hannah.lim@gmail.com" + }, + "intent_template_id": 244 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 244, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show me the {{information}} of the customer who is the most unhappy with {{product}}", + "instantiation_dict": { + "information": "email address", + "product": "Olivia zip jacket" + }, + "intent": "Show me the email address of the customer who is the most unhappy with Olivia zip jacket", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "emma.lopez@gmail.com" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "emma.lopez@gmail.com" + }, + "intent_template_id": 244 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 245, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show me the {{information}} of the customer who is the most unhappy with {{product}}", + "instantiation_dict": { + "information": "name", + "product": "Antonia racer tank" + }, + "intent": "Show me the name of the customer who is the most unhappy with Antonia racer tank", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Shaunte" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Shaunte" + }, + "intent_template_id": 244 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 246, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show me the {{information}} of the customer who is the most unhappy with {{product}}", + "instantiation_dict": { + "information": "name", + "product": "Chloe tank" + }, + "intent": "Show me the name of the customer who is the most unhappy with Chloe tank", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Teofila" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Teofila" + }, + "intent_template_id": 244 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 247, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show me the {{information}} of the customer who is the most unhappy with {{product}}", + "instantiation_dict": { + "information": "email address", + "product": "the style of Zoe products" + }, + "intent": "Show me the email address of the customer who is the most unhappy with the style of Zoe products", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "Valorie doesn't have a email in the system", + "program_html": [], + "string_note": "There is no negative review for Zoe products", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 244 + }, + { + "sites": [ + "map" + ], + "task_id": 248, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the coordinates of {{location}} in DD format", + "instantiation_dict": { + "location": "Carnegie Mellon Caf\u00e9" + }, + "intent": "Tell me the coordinates of Carnegie Mellon Caf\u00e9 in DD format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "40.442", + "-79.939" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "40.4424191, -79.9397388" + }, + "intent_template_id": 46 + }, + { + "sites": [ + "map" + ], + "task_id": 249, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the coordinates of {{location}} in DD format", + "instantiation_dict": { + "location": "Western Pennsylvania Hospital Heliport" + }, + "intent": "Tell me the coordinates of Western Pennsylvania Hospital Heliport in DD format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "40.460", + "-79.946" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "40.46076, -79.94666" + }, + "intent_template_id": 46 + }, + { + "sites": [ + "map" + ], + "task_id": 250, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the coordinates of {{location}} in DD format", + "instantiation_dict": { + "location": "Apple Store near Pitt" + }, + "intent": "Tell me the coordinates of Apple Store near Pitt in DD format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "40.451", + "-79.933" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "40.4511693, -79.9334241" + }, + "intent_template_id": 46 + }, + { + "sites": [ + "map" + ], + "task_id": 251, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the coordinates of {{location}} in DD format", + "instantiation_dict": { + "location": "bus stop on the Carnegie art museum side of the street near CMU" + }, + "intent": "Tell me the coordinates of bus stop on the Carnegie art museum side of the street near CMU in DD format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "40.444", + "-79.948" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "40.4443, -79.94889" + }, + "intent_template_id": 46 + }, + { + "sites": [ + "map" + ], + "task_id": 252, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the coordinates of {{location}} in DD format", + "instantiation_dict": { + "location": "Tokyo Japanese Food Store in Pittsburgh" + }, + "intent": "Tell me the coordinates of Tokyo Japanese Food Store in Pittsburgh in DD format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "40.457", + "-79.929" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "40.45761, -79.92934" + }, + "intent_template_id": 46 + }, + { + "sites": [ + "map" + ], + "task_id": 253, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the {{information}} of {{location}}", + "instantiation_dict": { + "location": "Carnegie Mellon Caf\u00e9", + "information": "phone number" + }, + "intent": "What is the phone number of Carnegie Mellon Caf\u00e9", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no such information in the map", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 501 + }, + { + "sites": [ + "map" + ], + "task_id": 254, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the {{information}} of {{location}}", + "instantiation_dict": { + "location": "Western Pennsylvania Hospital", + "information": "phone number" + }, + "intent": "What is the phone number of Western Pennsylvania Hospital", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "4125785000" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "4125785000" + }, + "intent_template_id": 501 + }, + { + "sites": [ + "map" + ], + "task_id": 255, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Who is the {{information}} of {{location}}", + "instantiation_dict": { + "location": "PIT airport", + "information": "operator" + }, + "intent": "Who is the operator of PIT airport", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Allegheny County Airport Authority" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Allegheny County Airport Authority" + }, + "intent_template_id": 501 + }, + { + "sites": [ + "map" + ], + "task_id": 256, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the {{information}} of {{location}}", + "instantiation_dict": { + "location": "Carnegie art museum in pittsburgh", + "information": "website" + }, + "intent": "What is the website of Carnegie art museum in pittsburgh", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "http://web.cmoa.org/" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "http://web.cmoa.org/" + }, + "intent_template_id": 501 + }, + { + "sites": [ + "map" + ], + "task_id": 257, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the {{information}} of {{location}}", + "instantiation_dict": { + "location": "Tokyo Japanese Food Store in Pittsburgh", + "information": "hours of operation" + }, + "intent": "What is the hours of operation of Tokyo Japanese Food Store in Pittsburgh", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "We-Su 10:00-17:00" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "We-Su 10:00-17:00" + }, + "intent_template_id": 501 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 258, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "See all public projects", + "instantiation_dict": {}, + "intent": "See all public projects", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/explore", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 325 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 259, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Get me my RSS feed token", + "instantiation_dict": {}, + "intent": "Get me my RSS feed token", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "TMN_bBn9Z48qVbUFZV45" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "TMN_bBn9Z48qVbUFZV45" + }, + "intent_template_id": 312 + }, + { + "sites": [ + "shopping" + ], + "task_id": 260, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I want to browse the products in the {{category}} category", + "instantiation_dict": { + "category": "Video Game" + }, + "intent": "I want to browse the products in the Video Game category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/video-games.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 211 + }, + { + "sites": [ + "shopping" + ], + "task_id": 261, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I want to browse the products in the {{category}} category", + "instantiation_dict": { + "category": "Headphones" + }, + "intent": "I want to browse the products in the Headphones category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/electronics/headphones.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 211 + }, + { + "sites": [ + "shopping" + ], + "task_id": 262, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I want to browse the products in the {{category}} category", + "instantiation_dict": { + "category": "Men shoes" + }, + "intent": "I want to browse the products in the Men shoes category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/clothing-shoes-jewelry/men/shoes.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 211 + }, + { + "sites": [ + "shopping" + ], + "task_id": 263, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I want to browse the products in the {{category}} category", + "instantiation_dict": { + "category": "Woman clothing" + }, + "intent": "I want to browse the products in the Woman clothing category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/clothing-shoes-jewelry/women/clothing.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 211 + }, + { + "sites": [ + "shopping" + ], + "task_id": 264, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I want to browse the products in the {{category}} category", + "instantiation_dict": { + "category": "Cabinets, Racks & Shelves" + }, + "intent": "I want to browse the products in the Cabinets, Racks & Shelves category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/office-products/office-furniture-lighting/cabinets-racks-shelves.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 211 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 265, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What's the closest national park to {{city}}? How far is it to drive there?", + "instantiation_dict": { + "city": "Boston" + }, + "intent": "What's the closest national park to Boston? How far is it to drive there?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Acadia National Park", + "457km" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Acadia National Park\n457km" + }, + "intent_template_id": 85 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 266, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What's the closest national park to {{city}}?", + "instantiation_dict": { + "city": "the largest city in Maine" + }, + "intent": "What's the closest national park to the largest city in Maine?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Acadia National Park" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Acadia National Park" + }, + "intent_template_id": 85 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 267, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What's the closest national park to {{city}}? How long it takes to drive there?", + "instantiation_dict": { + "city": "the hometown of Stephen King" + }, + "intent": "What's the closest national park to the hometown of Stephen King? How long it takes to drive there?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Acadia National Park" + ], + "fuzzy_match": [ + "1h 23min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Acadia National Park\n1h 23min" + }, + "intent_template_id": 85 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 268, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What's the closest national park to {{city}}? How long does it take to bike there?", + "instantiation_dict": { + "city": "Vinalhaven, ME" + }, + "intent": "What's the closest national park to Vinalhaven, ME? How long does it take to bike there?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Acadia National Park" + ], + "fuzzy_match": [ + "10h 33min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Acadia National Park\n10h 33min" + }, + "intent_template_id": 85 + }, + { + "sites": [ + "shopping" + ], + "task_id": 269, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me products under ${{price}} in \"{{product_category}}\" category", + "instantiation_dict": { + "price": "25", + "product_category": "women shoes" + }, + "intent": "Show me products under $25 in \"women shoes\" category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/clothing-shoes-jewelry/women/shoes.html?price=0-25", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 139 + }, + { + "sites": [ + "shopping" + ], + "task_id": 270, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me products under ${{price}} in \"{{product_category}}\" category", + "instantiation_dict": { + "price": "30", + "product_category": "men shoes" + }, + "intent": "Show me products under $30 in \"men shoes\" category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/clothing-shoes-jewelry/men/shoes.html?price=0-30", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 139 + }, + { + "sites": [ + "shopping" + ], + "task_id": 271, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me products under ${{price}} in \"{{product_category}}\" category", + "instantiation_dict": { + "price": "46.99", + "product_category": "makeup remover" + }, + "intent": "Show me products under $46.99 in \"makeup remover\" category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/beauty-personal-care/makeup/makeup-remover.html?price=0-46.99", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 139 + }, + { + "sites": [ + "shopping" + ], + "task_id": 272, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me products under ${{price}} in \"{{product_category}}\" category", + "instantiation_dict": { + "price": "78", + "product_category": "children dental care" + }, + "intent": "Show me products under $78 in \"children dental care\" category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/beauty-personal-care/oral-care/children-s-dental-care.html?price=0-78", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 139 + }, + { + "sites": [ + "shopping" + ], + "task_id": 273, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me products under ${{price}} in \"{{product_category}}\" category", + "instantiation_dict": { + "price": "199", + "product_category": "furtiture with accent" + }, + "intent": "Show me products under $199 in \"furtiture with accent\" category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/home-kitchen/furniture/accent-furniture.html?price=0-199", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 139 + }, + { + "sites": [ + "shopping" + ], + "task_id": 274, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Search for \"{{keyword}}\"", + "instantiation_dict": { + "keyword": "usb wifi" + }, + "intent": "Search for \"usb wifi\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/catalogsearch/result/?q=usb+wifi", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 212 + }, + { + "sites": [ + "shopping" + ], + "task_id": 275, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Search for \"{{keyword}}\"", + "instantiation_dict": { + "keyword": "xbox" + }, + "intent": "Search for \"xbox\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/catalogsearch/result/?q=xbox", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 212 + }, + { + "sites": [ + "shopping" + ], + "task_id": 276, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Search for \"{{keyword}}\"", + "instantiation_dict": { + "keyword": "switch accessories" + }, + "intent": "Search for \"switch accessories\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/catalogsearch/result/?q=switch+accessories", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 212 + }, + { + "sites": [ + "shopping" + ], + "task_id": 277, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Search for \"{{keyword}}\"", + "instantiation_dict": { + "keyword": "batteries for iphone 13" + }, + "intent": "Search for \"batteries for iphone 13\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/catalogsearch/result/?q=iphone+13", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 212 + }, + { + "sites": [ + "shopping" + ], + "task_id": 278, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Search for \"{{keyword}}\"", + "instantiation_dict": { + "keyword": "green tea bag for weight loss" + }, + "intent": "Search for \"green tea bag for weight loss\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/catalogsearch/result/?q=green+tea+bag+for+weight+loss", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 212 + }, + { + "sites": [ + "shopping" + ], + "task_id": 279, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Provide me with the complete names of Bluetooth headphones from Sony, and also share the price range for the available models", + "instantiation_dict": {}, + "intent": "Provide me with the complete names of Bluetooth headphones from Sony, and also share the price range for the available models", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "SONY WH1000XM3 Bluetooth Wireless Noise Canceling Headphones Silver WH-1000XM3/S (Renewed)", + "Sony WH-CH710N/H Wireless Bluetooth Noise Cancelling Headphones", + "Sony WH-1000XM3B Wireless Bluetooth Noise-Canceling Over-Ear Headphones (Black) Basic Headphone Bundle Kit with Stylus", + "Sony Wireless Headphones WH-CH510: Wireless Bluetooth On-Ear Headset with Mic for Phone-Call, Black", + "Sony WHCH710N Wireless Bluetooth Noise Canceling Over-The-Ear Headphones (Black) with Kratos 18W PD Two-Port Power Adapter and Kratos 6-Feet Nylon Braided USB-C Cable Bundle (3 Items)", + "Sony WI-SP500 Wireless in-Ear Sports Headphones, White (WISP500/W)", + "Sony WI-SP510 Extra BASS Wireless in-Ear Headset/Headphones with mic for Phone Call Sports IPX5 Bluetooth, Black (WISP510/B)", + "Sony MDRAS600BT Active Sports Bluetooth Headset (Black)", + "Sony WH-1000XM4 Wireless Noise Canceling Over-Ear Headphones (Black) with Sony WLA-NS7 Wireless TV Adapter Bundle (2 Items)", + "Sony WI-C300 Wireless In-Ear Headphones, Red (WIC300/R)", + "Sony XB950N1 Extra Bass Wireless Noise Canceling Headphones, Black", + "SONY - H900N Hi-Res Noise Cancelling Wireless Headphone Grayish Black Renewed", + "18.99", + "406" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "These models are avaiable: SONY WH1000XM3 Bluetooth Wireless Noise Canceling Headphones Silver WH-1000XM3/S (Renewed) Sony WH-CH710N/H Wireless Bluetooth Noise Cancelling Headphones Sony WH-1000XM3B Wireless Bluetooth Noise-Canceling Over-Ear Headphones (Black) Basic Headphone Bundle Kit with Stylus Sony Wireless Headphones WH-CH510: Wireless Bluetooth On-Ear Headset with Mic for Phone-Call, Black Sony WHCH710N Wireless Bluetooth Noise Canceling Over-The-Ear Headphones (Black) with Kratos 18W PD Two-Port Power Adapter and Kratos 6-Feet Nylon Braided USB-C Cable Bundle (3 Items) Sony WI-SP500 Wireless in-Ear Sports Headphones, White (WISP500/W) Sony WI-SP510 Extra BASS Wireless in-Ear Headset/Headphones with mic for Phone Call Sports IPX5 Bluetooth, Black (WISP510/B) Sony MDRAS600BT Active Sports Bluetooth Headset (Black) Sony WH-1000XM4 Wireless Noise Canceling Over-Ear Headphones (Black) with Sony WLA-NS7 Wireless TV Adapter Bundle (2 Items) Sony WI-C300 Wireless In-Ear Headphones, Red (WIC300/R) Sony XB950N1 Extra Bass Wireless Noise Canceling Headphones, Black SONY - H900N Hi-Res Noise Cancelling Wireless Headphone Grayish Black Renewed The price ranges from $18.99 to $406 " + }, + "intent_template_id": 204 + }, + { + "sites": [ + "shopping" + ], + "task_id": 280, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Provide me with the full names of chargers from Anker, and also share the price range for the available models", + "instantiation_dict": {}, + "intent": "Provide me with the full names of chargers from Anker, and also share the price range for the available models", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Anker USB C Charger 30W, 711 Charger, Compact Fast Charger (Not Foldable) for MacBook Air/iPhone 13/13 Mini/13 Pro/13 Pro Max/12, Galaxy S21, Note 20, iPad Pro, Pixel, and More", + "Anker USB C Charger 40W, 521 Charger (Nano Pro), PIQ 3.0 Durable Compact Fast Charger (Not Foldable) for iPhone 13/13 Mini/13 Pro/13 Pro Max/12, Galaxy, Pixel 4/3, iPad/iPad Mini (Cable Not Included)", + "Anker PowerCore Speed 20000, 20000mAh Qualcomm Quick Charge 3.0 & PowerIQ Portable Charger, with Quick Charge Recharging, Power Bank for Samsung, iPhone, iPad and More, Black (A1278)", + "5Ft Micro-USB Charger Cord Cable Fit for Anker-PowerCore 5000 10000 20100 13000 26800 Mini 3350 Fusion II 15000 Redux 20000 Slim 10000 Astro E1 AC Replacement Power Adapter Supply", + "Anker 10W Max Wireless Charger, 313 Wireless Charger (Pad), Qi-Certified Wireless Charging 7.5W for iPhone 12/12 Pro/12 mini/12 Pro Max, 10W for Galaxy S10 S9 S8, S9 Plus, Note 9 (No AC Adapter)", + "Anker Wireless Charger, 313 Wireless Charger (Stand), Qi-Certified for iPhone 12, 12 Pro Max, SE, 11, 11 Pro, 11 Pro Max, XR, XS Max, 10W Fast-Charging Galaxy S20, S10 (No AC Adapter)", + "USB Charger, Anker Elite Dual Port 24W Wall Charger, PowerPort 2 with PowerIQ and Foldable Plug, for iPhone 11/Xs/XS Max/XR/X/8/7/6/Plus, iPad Pro/Air 2/Mini 3/Mini 4, Samsung S4/S5, and More", + "iPhone 12 Charger [GaN Tech], Anker 30W Compact USB-C Wall Charger with Power Delivery, PowerPort Atom for iPhone 12 / Mini/Pro/Pro Max / 11 / X/XS/XR, iPad Pro, MacBook 12'', Pixel, Galaxy", + "USB C Charger, Anker 30W 2 Port Fast Charger with 18W USB C Power Adapter, Foldable PowerPort PD 2 Charger for iPad Pro, iPhone 11/11 Pro / 11 Pro Max/XS/Max/XR/X, Pixel, Galaxy, and More", + "Anker 40W 5-Port USB Wall Charger, PowerPort 5 for iPhone XS / XS Max / XR / X / 8 / 7 / 6 / Plus, iPad Pro / Air 2 / mini, Galaxy S9 / S8 / Edge / Plus, Note 8 / 7, LG, Nexus, HTC and More, Black (AK-A2124111)", + "Anker Quick Charge 3.0 39W Dual USB Wall Charger, PowerPort Speed 2 for Galaxy S10/S9/S8/Edge/Plus, Note 8/7 and PowerIQ for iPhone Xs/XS Max/XR/X/8/Plus, iPad Pro/Air 2/Mini, LG, Nexus, HTC and More", + "USB C Charger, Anker 20W PIQ 3.0 Fast Charger with Foldable Plug, PowerPort III Charger for iPhone 13/13 Mini/13 Pro/13 Pro Max/12/11, iPad/iPad Mini, MagSafe, and More (Cable Not Included)", + "8.99", + "59.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "These models are availiable: Anker USB C Charger 30W, 711 Charger, Compact Fast Charger (Not Foldable) for MacBook Air/iPhone 13/13 Mini/13 Pro/13 Pro Max/12, Galaxy S21, Note 20, iPad Pro, Pixel, and More Anker USB C Charger 40W, 521 Charger (Nano Pro), PIQ 3.0 Durable Compact Fast Charger (Not Foldable) for iPhone 13/13 Mini/13 Pro/13 Pro Max/12, Galaxy, Pixel 4/3, iPad/iPad Mini (Cable Not Included) Anker PowerCore Speed 20000, 20000mAh Qualcomm Quick Charge 3.0 & PowerIQ Portable Charger, with Quick Charge Recharging, Power Bank for Samsung, iPhone, iPad and More, Black (A1278) 5Ft Micro-USB Charger Cord Cable Fit for Anker-PowerCore 5000 10000 20100 13000 26800 Mini 3350 Fusion II 15000 Redux 20000 Slim 10000 Astro E1 AC Replacement Power Adapter Supply Anker 10W Max Wireless Charger, 313 Wireless Charger (Pad), Qi-Certified Wireless Charging 7.5W for iPhone 12/12 Pro/12 mini/12 Pro Max, 10W for Galaxy S10 S9 S8, S9 Plus, Note 9 (No AC Adapter) Anker Wireless Charger, 313 Wireless Charger (Stand), Qi-Certified for iPhone 12, 12 Pro Max, SE, 11, 11 Pro, 11 Pro Max, XR, XS Max, 10W Fast-Charging Galaxy S20, S10 (No AC Adapter) USB Charger, Anker Elite Dual Port 24W Wall Charger, PowerPort 2 with PowerIQ and Foldable Plug, for iPhone 11/Xs/XS Max/XR/X/8/7/6/Plus, iPad Pro/Air 2/Mini 3/Mini 4, Samsung S4/S5, and More iPhone 12 Charger [GaN Tech], Anker 30W Compact USB-C Wall Charger with Power Delivery, PowerPort Atom for iPhone 12 / Mini/Pro/Pro Max / 11 / X/XS/XR, iPad Pro, MacBook 12'', Pixel, Galaxy USB C Charger, Anker 30W 2 Port Fast Charger with 18W USB C Power Adapter, Foldable PowerPort PD 2 Charger for iPad Pro, iPhone 11/11 Pro / 11 Pro Max/XS/Max/XR/X, Pixel, Galaxy, and More Anker 40W 5-Port USB Wall Charger, PowerPort 5 for iPhone XS / XS Max / XR / X / 8 / 7 / 6 / Plus, iPad Pro / Air 2 / mini, Galaxy S9 / S8 / Edge / Plus, Note 8 / 7, LG, Nexus, HTC and More, Black (AK-A2124111) Anker Quick Charge 3.0 39W Dual USB Wall Charger, PowerPort Speed 2 for Galaxy S10/S9/S8/Edge/Plus, Note 8/7 and PowerIQ for iPhone Xs/XS Max/XR/X/8/Plus, iPad Pro/Air 2/Mini, LG, Nexus, HTC and More USB C Charger, Anker 20W PIQ 3.0 Fast Charger with Foldable Plug, PowerPort III Charger for iPhone 13/13 Mini/13 Pro/13 Pro Max/12/11, iPad/iPad Mini, MagSafe, and More (Cable Not Included) Magnetic Wireless Charger, Anker Wireless Charger with 5ft Built-in USB-C Cable, PowerWave Magnetic Pad, 7.5W Charging for iPhone 13 / 13 Pro / 13 Pro Max / 13 mini / 12 / 12 Pro (No AC Adapter) USB C Super Fast Charger, Anker 25W PD Wall Charger Fast Charging for Samsung Galaxy S21/S21+/S21 Ultra/S20/Z Flip/Note20/20 Ultra/Note10/10+/S9/S8/S10e, iPad Pro 12.9, and More (Cable not Included) The price ranges from $8.99 to $59.99" + }, + "intent_template_id": 204 + }, + { + "sites": [ + "shopping" + ], + "task_id": 281, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Please provide me with the complete product names of Oral B brush heads designed for children, along with their corresponding price range per brush", + "instantiation_dict": {}, + "intent": "Please provide me with the complete product names of Oral B brush heads designed for children, along with their corresponding price range per brush", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Oral-B Kids Extra Soft Replacement Brush Heads featuring STAR WARS, 2 count", + "Kids By Oral-b Stages Power Star Wars Replacement Heads 4 Pack", + "3.745", + "6.495" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "These models are availiable: Oral-B Kids Extra Soft Replacement Brush Heads featuring STAR WARS, 2 count Kids By Oral-b Stages Power Star Wars Replacement Heads 4 Pack The price ranges from $3.745 to $6.495 " + }, + "intent_template_id": 204 + }, + { + "sites": [ + "shopping" + ], + "task_id": 282, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "List the full product names of slide slippers from Nike and tell me the price range of the available products", + "instantiation_dict": {}, + "intent": "List the full product names of slide slippers from Nike and tell me the price range of the available products", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Nike Men's Air Max Camden Slide Sandal", + "Nike Men's Benassi JDI Fanny Pack Slides", + "Nike Victori One Mens Comfort Slide Cn9675-003 (Midnight Navy/Midnight Navy/White, Numeric_10)", + "Nike Offcourt Slide Mens Bq4639-002 Size 12", + "Nike Jordan Men's Break Slide Red AR6374-602", + "Nike Victori One Slide Mens Style : Dd9559-300", + "Nike Men's Benassi Solarsoft Slide Athletic Sandal (Black/White, numeric_14)", + "Nike Men's Benassi Solarsoft Slide Athletic Sandal (Midnight Navy/Blue, numeric_8)", + "Nike womens Benassi Just Do It", + "27.6", + "90.65" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "These models are availiable: Nike Men's Air Max Camden Slide Sandal Nike Men's Benassi JDI Fanny Pack Slides Nike Victori One Mens Comfort Slide Cn9675-003 (Midnight Navy/Midnight Navy/White, Numeric_10) Nike Offcourt Slide Mens Bq4639-002 Size 12 Nike Jordan Men's Break Slide Red AR6374-602 Nike Victori One Slide Mens Style : Dd9559-300 Nike Men's Benassi Solarsoft Slide Athletic Sandal (Black/White, numeric_14) Nike Men's Benassi Solarsoft Slide Athletic Sandal (Midnight Navy/Blue, numeric_8) Nike womens Benassi Just Do It The price ranges from $27.6 to $90.65" + }, + "intent_template_id": 204 + }, + { + "sites": [ + "shopping" + ], + "task_id": 283, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Look up the most recent models of XBox controllers released between 2020-2021?", + "instantiation_dict": {}, + "intent": "Look up the most recent models of XBox controllers released between 2020-2021?", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/microsoft-xbox-controller-carbon-black-for-series-x-series-s-xbox-one-windows-10-android-ios-bundled-with-dual-port-charging-dock-xbox-controller-skin-voucher-premgear-cloth.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 210 + }, + { + "sites": [ + "shopping" + ], + "task_id": 284, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show the least expensive {{product}} with a minimum storage capacity of {{min_storage}}.", + "instantiation_dict": { + "product": "shoe storage", + "min_storage": "12 pairs" + }, + "intent": "Show the least expensive shoe storage with a minimum storage capacity of 12 pairs.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/onlyeasy-over-the-door-shoe-storage-organizer-hanging-shoe-rack-holder-with-24-large-fabric-pockets-22-1-x-61-4-herringbone-grey-mxrodsb1p.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 207 + }, + { + "sites": [ + "shopping" + ], + "task_id": 285, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show the least expensive {{product}} with a minimum storage capacity of {{min_storage}}.", + "instantiation_dict": { + "product": "switch card holder", + "min_storage": "15 cards" + }, + "intent": "Show the least expensive switch card holder with a minimum storage capacity of 15 cards.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/game-card-holder-storage-case-for-nintendo-switch-games-or-ps-vita-game-case-or-sd-memory-cards-black.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 207 + }, + { + "sites": [ + "shopping" + ], + "task_id": 286, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show the least expensive {{product}} with a minimum storage capacity of {{min_storage}}.", + "instantiation_dict": { + "product": "ssd hard drive", + "min_storage": "1TB" + }, + "intent": "Show the least expensive ssd hard drive with a minimum storage capacity of 1TB.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/external-hard-drive-2tb-ultra-thin-external-hard-drive-2000gb-ultra-high-speed-portable-3-1-type-c-storage-drive-compatible-with-pc-laptop-and-mac-2tb-a1.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 207 + }, + { + "sites": [ + "map" + ], + "task_id": 287, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "How much time does it take from Pittsburgh to Philadelphia by car?", + "instantiation_dict": {}, + "intent": "How much time does it take from Pittsburgh to Philadelphia by car?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "5h 47min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "5h 47min" + }, + "intent_template_id": 47 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 288, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the customer who has the most cancellations in the history", + "instantiation_dict": { + "attribute": "name" + }, + "intent": "Tell me the name of the customer who has the most cancellations in the history", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Samantha Jones" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Samantha Jones" + }, + "intent_template_id": 234 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 289, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the customer who has the most cancellations in the history", + "instantiation_dict": { + "attribute": "email address, name, phone number" + }, + "intent": "Tell me the email address, name, phone number of the customer who has the most cancellations in the history", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "email: coolcat321@hotmail.com", + "name: Samantha Jones", + "phone number: 3055551212" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "email: coolcat321@hotmail.com name: Samantha Jones phone number: 3055551212" + }, + "intent_template_id": 234 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 290, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the customer who has the most cancellations in the history", + "instantiation_dict": { + "attribute": "product SKUs in the most recent cancelled orders" + }, + "intent": "Tell me the product SKUs in the most recent cancelled orders of the customer who has the most cancellations in the history", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "WSH09-29-White", + "WSH09-28-Green", + "MSH11-34-Blue", + "WP09-29-Purple" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "WSH09-29-White,WSH09-28-Green,MSH11-34-Blue,WP09-29-Purple" + }, + "intent_template_id": 234 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 291, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the customer who has the most cancellations in the history", + "instantiation_dict": { + "attribute": "total spend on products in the most recent cancelled orders" + }, + "intent": "Tell me the total spend on products in the most recent cancelled orders of the customer who has the most cancellations in the history", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "148" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$148" + }, + "intent_template_id": 234 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 292, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the customer who has the most cancellations in the history", + "instantiation_dict": { + "attribute": "total number of cancellations" + }, + "intent": "Tell me the total number of cancellations of the customer who has the most cancellations in the history", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "9" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "9" + }, + "intent_template_id": 234 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 293, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Show me the command to clone {{repo}} with SSH.", + "instantiation_dict": { + "repo": "Super_Awesome_Robot" + }, + "intent": "Show me the command to clone Super_Awesome_Robot with SSH.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "git clone ssh://git@metis.lti.cs.cmu.edu:2222/convexegg/super_awesome_robot.git" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "git clone ssh://git@metis.lti.cs.cmu.edu:2222/convexegg/super_awesome_robot.git" + }, + "intent_template_id": 329 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 294, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Show me the command to clone {{repo}} with SSH.", + "instantiation_dict": { + "repo": "ChatGPT" + }, + "intent": "Show me the command to clone ChatGPT with SSH.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "git clone ssh://git@metis.lti.cs.cmu.edu:2222/convexegg/chatgpt.git" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "git clone ssh://git@metis.lti.cs.cmu.edu:2222/convexegg/chatgpt.git" + }, + "intent_template_id": 329 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 295, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Show me the command to clone {{repo}} with SSH.", + "instantiation_dict": { + "repo": "metaseq" + }, + "intent": "Show me the command to clone metaseq with SSH.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "git clone ssh://git@metis.lti.cs.cmu.edu:2222/root/metaseq.git" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "git clone ssh://git@metis.lti.cs.cmu.edu:2222/root/metaseq.git" + }, + "intent_template_id": 329 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 296, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Show me the command to clone {{repo}} with SSH.", + "instantiation_dict": { + "repo": "the best GAN python implementation" + }, + "intent": "Show me the command to clone the best GAN python implementation with SSH.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "ssh://git@metis.lti.cs.cmu.edu:2222/eriklindernoren/PyTorch-GAN.git" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "ssh://git@metis.lti.cs.cmu.edu:2222/eriklindernoren/PyTorch-GAN.git" + }, + "intent_template_id": 329 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 297, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Show me the command to clone {{repo}} with SSH.", + "instantiation_dict": { + "repo": "the most stared Covid location tracker" + }, + "intent": "Show me the command to clone the most stared Covid location tracker with SSH.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "ssh://git@metis.lti.cs.cmu.edu:2222/yjlou/2019-nCov.git" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "ssh://git@metis.lti.cs.cmu.edu:2222/yjlou/2019-nCov.git" + }, + "intent_template_id": 329 + }, + { + "sites": [ + "shopping" + ], + "task_id": 298, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show the most recent {{status}} order", + "instantiation_dict": { + "status": "completed" + }, + "intent": "Show the most recent completed order", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/sales/order/view/order_id/180/", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 180 + }, + { + "sites": [ + "shopping" + ], + "task_id": 299, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show the most recent {{status}} order", + "instantiation_dict": { + "status": "cancelled" + }, + "intent": "Show the most recent cancelled order", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/sales/order/view/order_id/170/", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 180 + }, + { + "sites": [ + "shopping" + ], + "task_id": 300, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show the most recent {{status}} order", + "instantiation_dict": { + "status": "pending" + }, + "intent": "Show the most recent pending order", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/sales/order/view/order_id/189/", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 180 + }, + { + "sites": [ + "shopping" + ], + "task_id": 301, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show the most recent {{status}} order", + "instantiation_dict": { + "status": "processing" + }, + "intent": "Show the most recent processing order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": {"fuzzy_match": "N/A"}, + "reference_url": "", + "program_html": [], + "string_note": "there is no order in processing" + }, + "intent_template_id": 180 + }, + { + "sites": [ + "shopping" + ], + "task_id": 302, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show the most recent {{status}} order", + "instantiation_dict": { + "status": "out of delivery" + }, + "intent": "Show the most recent out of delivery order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": {"fuzzy_match": "N/A"}, + "reference_url": "", + "program_html": [], + "string_note": "there is no order in processing" + }, + "intent_template_id": 180 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 303, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make {{period}}?", + "instantiation_dict": { + "user": "Kilian", + "period": "durning 2023" + }, + "intent": "How many commits did Kilian make durning 2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1" + }, + "intent_template_id": 321 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 304, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make {{period}}?", + "instantiation_dict": { + "user": "Eric", + "period": "between Feb 2023 and May 2023" + }, + "intent": "How many commits did Eric make between Feb 2023 and May 2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "14" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "14" + }, + "intent_template_id": 321 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 305, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make {{period}}?", + "instantiation_dict": { + "user": "Philip", + "period": "in 2023/1" + }, + "intent": "How many commits did Philip make in 2023/1?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 321 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 306, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make {{period}}?", + "instantiation_dict": { + "user": "Anthony", + "period": "between 08/2022-09/2022" + }, + "intent": "How many commits did Anthony make between 08/2022-09/2022?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 321 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 307, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make {{period}}?", + "instantiation_dict": { + "user": "Nic", + "period": "in April 2021" + }, + "intent": "How many commits did Nic make in April 2021?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "16" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "16" + }, + "intent_template_id": 321 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 308, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Tell me who has made the most contributions, in terms of number of commits, to the {{repo}} project", + "instantiation_dict": { + "repo": "primer/design" + }, + "intent": "Tell me who has made the most contributions, in terms of number of commits, to the primer/design project", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Shawn Allen" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Shawn Allen" + }, + "intent_template_id": 323 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 309, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Tell me who has made the most contributions, in terms of number of commits, to the {{repo}} project", + "instantiation_dict": { + "repo": "thoughtbot/administrate" + }, + "intent": "Tell me who has made the most contributions, in terms of number of commits, to the thoughtbot/administrate project", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Grayson Wright" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Grayson Wright" + }, + "intent_template_id": 323 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 310, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Tell me who has made the most contributions, in terms of number of commits, to the {{repo}} project", + "instantiation_dict": { + "repo": "AndroidSlidingUpPanel" + }, + "intent": "Tell me who has made the most contributions, in terms of number of commits, to the AndroidSlidingUpPanel project", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "tokudu" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "tokudu" + }, + "intent_template_id": 323 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 311, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Tell me who has made the most contributions, in terms of number of commits, to the {{repo}} project", + "instantiation_dict": { + "repo": "Pytorch GAN" + }, + "intent": "Tell me who has made the most contributions, in terms of number of commits, to the Pytorch GAN project", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Erik Linder-Nor\u00e9n" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Erik Linder-Nor\u00e9n" + }, + "intent_template_id": 323 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 312, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Tell me who has made the most contributions, in terms of number of commits, to the {{repo}} project", + "instantiation_dict": { + "repo": "csvkit" + }, + "intent": "Tell me who has made the most contributions, in terms of number of commits, to the csvkit project", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Christopher Groskopf" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Christopher Groskopf" + }, + "intent_template_id": 323 + }, + { + "sites": [ + "shopping" + ], + "task_id": 313, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Which number to call for the customer service?", + "instantiation_dict": {}, + "intent": "Which number to call for the customer service?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no phone number in the website", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 134 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 314, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "List the {{attribute}} of the top 3 contributors to {{repo}} repo, ranked by the number of commits?", + "instantiation_dict": { + "repo": "prime/design", + "attribute": "name" + }, + "intent": "List the name of the top 3 contributors to prime/design repo, ranked by the number of commits?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Shawn Allen", + "Inayaili Le\u00f3n", + "Aurora Pleguezuelo" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Shawn Allen, Inayaili Le\u00f3n, Aurora Pleguezuelo" + }, + "intent_template_id": 324 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 315, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "List the {{attribute}} of the top 3 contributors to {{repo}} repo, ranked by the number of commits?", + "instantiation_dict": { + "repo": "Pytorch GAN", + "attribute": "email address" + }, + "intent": "List the email address of the top 3 contributors to Pytorch GAN repo, ranked by the number of commits?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "eriklindernoren@live.se", + "eriklindernoren@gmail.com", + "pinnacle.chen@qq.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "eriklindernoren@live.se, eriklindernoren@gmail.com, pinnacle.chen@qq.com" + }, + "intent_template_id": 324 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 316, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "List the {{attribute}} of the top 3 contributors to {{repo}} repo, ranked by the number of commits?", + "instantiation_dict": { + "repo": "facebook's guide on building react apps", + "attribute": "name" + }, + "intent": "List the name of the top 3 contributors to facebook's guide on building react apps repo, ranked by the number of commits?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Ian Sutherland", + "Joe Hadda", + "Dan Abramov" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Ian Sutherland, Joe Hadda, Dan Abramov" + }, + "intent_template_id": 324 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 317, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "List the {{attribute}} of the top 3 contributors to {{repo}} repo, ranked by the number of commits?", + "instantiation_dict": { + "repo": "metaseq", + "attribute": "name and number of commits" + }, + "intent": "List the name and number of commits of the top 3 contributors to metaseq repo, ranked by the number of commits?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Susan Zhang: 70", + "Stephen Roller: 51", + "Peter Albert: 12" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Susan Zhang: 70, Stephen Roller: 51, Peter Albert: 12" + }, + "intent_template_id": 324 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 318, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "List the {{attribute}} of the top 3 contributors to {{repo}} repo, ranked by the number of commits?", + "instantiation_dict": { + "repo": "2019-nCov", + "attribute": "last names" + }, + "intent": "List the last names of the top 3 contributors to 2019-nCov repo, ranked by the number of commits?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Lo", + "Chen", + "Chu" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lo, Chen, Chu" + }, + "intent_template_id": 324 + }, + { + "sites": [ + "shopping" + ], + "task_id": 319, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much refund I should expect from my order canlled in {{time}}, including shipping fee", + "instantiation_dict": { + "time": "April 2022" + }, + "intent": "How much refund I should expect from my order canlled in April 2022, including shipping fee", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 160 + }, + { + "sites": [ + "shopping" + ], + "task_id": 320, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much refund I should expect from my order canlled in {{time}}, including shipping fee", + "instantiation_dict": { + "time": "Feb 2023" + }, + "intent": "How much refund I should expect from my order canlled in Feb 2023, including shipping fee", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "406.53" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "406.53" + }, + "intent_template_id": 160 + }, + { + "sites": [ + "shopping" + ], + "task_id": 321, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much refund I should expect from my order canlled in {{time}}, including shipping fee", + "instantiation_dict": { + "time": "2022" + }, + "intent": "How much refund I should expect from my order canlled in 2022, including shipping fee", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "3053.97" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3053.97" + }, + "intent_template_id": 160 + }, + { + "sites": [ + "shopping" + ], + "task_id": 322, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much refund I should expect from my order canlled in {{time}} if I cannot get the shipping fee refunded?", + "instantiation_dict": { + "time": "May 2023" + }, + "intent": "How much refund I should expect from my order canlled in May 2023 if I cannot get the shipping fee refunded?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "350.42" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "350.42" + }, + "intent_template_id": 160 + }, + { + "sites": [ + "shopping" + ], + "task_id": 323, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much refund I should expect from my order canlled in {{time}}? I only kept the AC-DC Adapter and the shop told me that I cannot get the shipping fee back", + "instantiation_dict": { + "time": "2022/03" + }, + "intent": "How much refund I should expect from my order canlled in 2022/03? I only kept the AC-DC Adapter and the shop told me that I cannot get the shipping fee back", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "264.49" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "264.49" + }, + "intent_template_id": 160 + }, + { + "sites": [ + "shopping" + ], + "task_id": 324, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me the \"{{product}}\" listings by {{sorting_order}}.", + "instantiation_dict": { + "product": "chairs", + "sorting_order": "ascending price" + }, + "intent": "Show me the \"chairs\" listings by ascending price.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/catalogsearch/result/index/?product_list_order=price&q=chairs&product_list_dir=asc", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 208 + }, + { + "sites": [ + "shopping" + ], + "task_id": 325, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me the \"{{product}}\" listings by {{sorting_order}}.", + "instantiation_dict": { + "product": "mouth night guard", + "sorting_order": "descending price" + }, + "intent": "Show me the \"mouth night guard\" listings by descending price.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/catalogsearch/result/index/?q=mouth%20night%20guard%20&product_list_order=price", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 208 + }, + { + "sites": [ + "shopping" + ], + "task_id": 326, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me the \"{{product}}\" listings by {{sorting_order}}.", + "instantiation_dict": { + "product": "Canon photo printer", + "sorting_order": "search relevance, from most to least" + }, + "intent": "Show me the \"Canon photo printer\" listings by search relevance, from most to least.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/catalogsearch/result/?q=Canon+photo+printer", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 208 + }, + { + "sites": [ + "shopping" + ], + "task_id": 327, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me the \"{{product}}\" listings by {{sorting_order}}.", + "instantiation_dict": { + "product": "iphone 12 phone case", + "sorting_order": "name alphabetically" + }, + "intent": "Show me the \"iphone 12 phone case\" listings by name alphabetically.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/catalogsearch/result/index/?q=%20iphone%2012%20phone%20case&product_list_order=name", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 208 + }, + { + "sites": [ + "shopping" + ], + "task_id": 328, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me the \"{{product}}\" listings by {{sorting_order}}.", + "instantiation_dict": { + "product": "iphone 12 phone case", + "sorting_order": "price" + }, + "intent": "Show me the \"iphone 12 phone case\" listings by price.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/catalogsearch/result/index/?product_list_order=price&q=%20iphone%2012%20phone%20case", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 208 + }, + { + "sites": [ + "shopping" + ], + "task_id": 329, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much I spend {{time}} on shopping at One Stop Market?", + "instantiation_dict": { + "time": "on 4/19/2023" + }, + "intent": "How much I spend on 4/19/2023 on shopping at One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 147 + }, + { + "sites": [ + "shopping" + ], + "task_id": 330, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much I spend {{time}} on shopping at One Stop Market?", + "instantiation_dict": { + "time": "in March 2023" + }, + "intent": "How much I spend in March 2023 on shopping at One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "81.31" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "81.31" + }, + "intent_template_id": 147 + }, + { + "sites": [ + "shopping" + ], + "task_id": 331, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much I spend {{time}} on shopping at One Stop Market?", + "instantiation_dict": { + "time": "in July 2022" + }, + "intent": "How much I spend in July 2022 on shopping at One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "40.16" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "40.16" + }, + "intent_template_id": 147 + }, + { + "sites": [ + "shopping" + ], + "task_id": 332, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much I spend {{time}} on shopping at One Stop Market?", + "instantiation_dict": { + "time": "each month from Jan to the end of March 2023" + }, + "intent": "How much I spend each month from Jan to the end of March 2023 on shopping at One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Jan: 572.8", + "Feb: 762.18", + "Mar: 83.31" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Jan: 572.8\nFeb: 762.18\nMar: 83.31" + }, + "intent_template_id": 147 + }, + { + "sites": [ + "shopping" + ], + "task_id": 333, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much did I spend on shopping at One Stop Market {{time}}? They gave me a 20% discount on the total amount for orders exceeding $200 in cash", + "instantiation_dict": { + "time": "on November 2022" + }, + "intent": "How much did I spend on shopping at One Stop Market on November 2022? They gave me a 20% discount on the total amount for orders exceeding $200 in cash", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "359.546" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "359.546" + }, + "intent_template_id": 147 + }, + { + "sites": [ + "shopping" + ], + "task_id": 334, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me when I last ordered my {{description}}?", + "instantiation_dict": { + "description": "muffin cornbread mix" + }, + "intent": "Tell me when I last ordered my muffin cornbread mix?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "March 11th 2023" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "March 11th 2023" + }, + "intent_template_id": 169 + }, + { + "sites": [ + "shopping" + ], + "task_id": 335, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me when I last ordered my {{description}}?", + "instantiation_dict": { + "description": "body butter" + }, + "intent": "Tell me when I last ordered my body butter?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "January 16th 2023" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "January 16th 2023" + }, + "intent_template_id": 169 + }, + { + "sites": [ + "shopping" + ], + "task_id": 336, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me when I last ordered my {{description}}?", + "instantiation_dict": { + "description": "conditioner" + }, + "intent": "Tell me when I last ordered my conditioner?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "January 16th 2023" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "January 16th 2023" + }, + "intent_template_id": 169 + }, + { + "sites": [ + "shopping" + ], + "task_id": 337, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me when I last ordered my {{description}}?", + "instantiation_dict": { + "description": "bread olive" + }, + "intent": "Tell me when I last ordered my bread olive?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "December 12th 2022" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "December 12th 2022" + }, + "intent_template_id": 169 + }, + { + "sites": [ + "shopping" + ], + "task_id": 338, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me when I last ordered my {{description}}?", + "instantiation_dict": { + "description": "toothpaste" + }, + "intent": "Tell me when I last ordered my toothpaste?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "December 4th 2022" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "December 4th 2022" + }, + "intent_template_id": 169 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 339, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "List all opened issues {{description}}", + "instantiation_dict": { + "description": "that report bugs" + }, + "intent": "List all opened issues that report bugs", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/issues/?label_name%5B%5D=bug", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 299 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 340, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/primer/design", + "geolocation": null, + "intent_template": "List all opened issues {{description}}", + "instantiation_dict": { + "description": "that report bugs" + }, + "intent": "List all opened issues that report bugs", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/primer/design/-/issues/?label_name%5B%5D=type%3A%20bug%20%F0%9F%90%9E", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 299 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 341, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/root/metaseq", + "geolocation": null, + "intent_template": "List all opened issues {{description}}", + "instantiation_dict": { + "description": "requesting new features" + }, + "intent": "List all opened issues requesting new features", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/root/metaseq/-/issues/?label_name%5B%5D=enhancement", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 299 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 342, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/root/metaseq", + "geolocation": null, + "intent_template": "List all opened issues {{description}}", + "instantiation_dict": { + "description": "that ask about OPT model related questions" + }, + "intent": "List all opened issues that ask about OPT model related questions", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/root/metaseq/-/issues/?search=OPT&label_name%5B%5D=question", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 299 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 343, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/root/metaseq", + "geolocation": null, + "intent_template": "List all opened issues {{description}}", + "instantiation_dict": { + "description": "that don't have any labels" + }, + "intent": "List all opened issues that don't have any labels", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/root/metaseq/-/issues/?label_name%5B%5D=None", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 299 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 344, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "How many reviews our shop received {{time}}?", + "instantiation_dict": { + "time": "by far" + }, + "intent": "How many reviews our shop received by far?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "351" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "351" + }, + "intent_template_id": 248 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 345, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "How many reviews our shop received {{time}}?", + "instantiation_dict": { + "time": "in Apr 2023" + }, + "intent": "How many reviews our shop received in Apr 2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "351" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "351" + }, + "intent_template_id": 248 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 346, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "How many reviews our shop received {{time}}?", + "instantiation_dict": { + "time": "during 2022" + }, + "intent": "How many reviews our shop received during 2022?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 248 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 347, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "How many reviews our shop received {{time}}?", + "instantiation_dict": { + "time": "from the beginning of the shop" + }, + "intent": "How many reviews our shop received from the beginning of the shop?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "351" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "351" + }, + "intent_template_id": 248 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 348, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "How many reviews our shop received {{time}}?", + "instantiation_dict": { + "time": "in May 2023" + }, + "intent": "How many reviews our shop received in May 2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 248 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 349, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Who else have access to my repo {{repo}}, show me their usernames", + "instantiation_dict": { + "repo": "gimmiethat.space" + }, + "intent": "Who else have access to my repo gimmiethat.space, show me their usernames", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "yjlou" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "yjlou" + }, + "intent_template_id": 298 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 350, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Who else have access to my repo {{repo}}, show me their usernames", + "instantiation_dict": { + "repo": "prism-theme" + }, + "intent": "Who else have access to my repo prism-theme, show me their usernames", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "abisubramanya27" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Abishek S, abisubramanya27" + }, + "intent_template_id": 298 + }, + { + "sites": [ + "shopping" + ], + "task_id": 351, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "List products from {{product_category}} category by {{order}} price", + "instantiation_dict": { + "product_category": "PS4 accessories", + "order": "ascending" + }, + "intent": "List products from PS4 accessories category by ascending price", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/video-games/playstation-4/accessories.html?product_list_order=price", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 137 + }, + { + "sites": [ + "shopping" + ], + "task_id": 352, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "List products from {{product_category}} category by {{order}} price", + "instantiation_dict": { + "product_category": "nutrition bars and drinks", + "order": "ascending" + }, + "intent": "List products from nutrition bars and drinks category by ascending price", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/health-household/diet-sports-nutrition/nutrition-bars-drinks.html?product_list_order=price", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 137 + }, + { + "sites": [ + "shopping" + ], + "task_id": 353, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "List products from {{product_category}} category by {{order}} price", + "instantiation_dict": { + "product_category": "competative swimwear", + "order": "ascending" + }, + "intent": "List products from competative swimwear category by ascending price", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/clothing-shoes-jewelry/sport-specific-clothing/competitive-swimwear.html?product_list_order=price", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 137 + }, + { + "sites": [ + "shopping" + ], + "task_id": 354, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "List products from {{product_category}} category by {{order}} price", + "instantiation_dict": { + "product_category": "living room furtniture", + "order": "descending" + }, + "intent": "List products from living room furtniture category by descending price", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/home-kitchen/furniture/living-room-furniture.html?product_list_order=price&product_list_dir=desc", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 137 + }, + { + "sites": [ + "shopping" + ], + "task_id": 355, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "List products from {{product_category}} category by {{order}} price", + "instantiation_dict": { + "product_category": "kids' bedding", + "order": "descending" + }, + "intent": "List products from kids' bedding category by descending price", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/home-kitchen/bedding/kids-bedding.html?product_list_dir=desc", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 137 + }, + { + "sites": [ + "map" + ], + "task_id": 356, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show the route from SCS CMU in Pittsburgh to the location where the Declaration of Independence and Constitution were signed", + "instantiation_dict": {}, + "intent": "Show the route from SCS CMU in Pittsburgh to the location where the Declaration of Independence and Constitution were signed", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Gates and Hillman Centers", + "Pittsburgh" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Independence Hall", + "Philadelphia" + ] + } + } + ] + }, + "intent_template_id": 49 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 357, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Checkout merge requests requiring my review", + "instantiation_dict": {}, + "intent": "Checkout merge requests requiring my review", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/dashboard/merge_requests?reviewer_username=byteblaze", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 291 + }, + { + "sites": [ + "shopping" + ], + "task_id": 358, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me the {{info}} for order number {{order_number}}.", + "instantiation_dict": { + "info": "shipping method", + "order_number": 187 + }, + "intent": "Show me the shipping method for order number 187.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Flat Rate - Fixed" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Flat Rate - Fixed" + }, + "intent_template_id": 206 + }, + { + "sites": [ + "shopping" + ], + "task_id": 359, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me the {{info}} for order number {{order_number}}.", + "instantiation_dict": { + "info": "order date", + "order_number": "148" + }, + "intent": "Show me the order date for order number 148.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "1/29/2023" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1/29/2023" + }, + "intent_template_id": 206 + }, + { + "sites": [ + "shopping" + ], + "task_id": 360, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me the {{info}} for order number {{order_number}}.", + "instantiation_dict": { + "info": "product names", + "order_number": "148" + }, + "intent": "Show me the product names for order number 148.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Bornbridge Artificial Spiral Topiary Tree - Indoor / Outdoor Topiary Trees - Artificial Outdoor Plants (2 Pack, 4' Cypress)", + "Russound 5B45W 4\" Indoor Outdoor Speakers White" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Bornbridge Artificial Spiral Topiary Tree - Indoor / Outdoor Topiary Trees - Artificial Outdoor Plants (2 Pack, 4' Cypress), Russound 5B45W 4\" Indoor Outdoor Speakers White" + }, + "intent_template_id": 206 + }, + { + "sites": [ + "shopping" + ], + "task_id": 361, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me the {{info}} for order number {{order_number}}.", + "instantiation_dict": { + "info": "order statuses", + "order_number": "170 and 189" + }, + "intent": "Show me the order statuses for order number 170 and 189.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "170: cancelled", + "189: pending" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "170: cancelled, 189: pending" + }, + "intent_template_id": 206 + }, + { + "sites": [ + "shopping" + ], + "task_id": 362, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me the {{info}} for order number {{order_number}}.", + "instantiation_dict": { + "info": "billing address", + "order_number": "00178" + }, + "intent": "Show me the billing address for order number 00178.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "101 S San Mateo Dr", + "San Mateo", + "California", + "94010", + "United States" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Emma Lopez, 101 S San Mateo Dr, San Mateo, California, 94010, United States" + }, + "intent_template_id": 206 + }, + { + "sites": [ + "map" + ], + "task_id": 363, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Measure distance between {{location/address_1}} and {{location/address_2}} by walking", + "instantiation_dict": { + "location/address_1": "Carnegie Mellon University", + "location/address_2": "Carnegie Music Hall" + }, + "intent": "Measure distance between Carnegie Mellon University and Carnegie Music Hall by walking", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "748m" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "748m" + }, + "intent_template_id": 58 + }, + { + "sites": [ + "map" + ], + "task_id": 364, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Measure distance between {{location/address_1}} and {{location/address_2}} by walking", + "instantiation_dict": { + "location/address_1": "Carnegie Mellon University", + "location/address_2": "UPMC Shadyside" + }, + "intent": "Measure distance between Carnegie Mellon University and UPMC Shadyside by walking", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "1.7km" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1.7km" + }, + "intent_template_id": 58 + }, + { + "sites": [ + "map" + ], + "task_id": 365, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Measure distance between {{location/address_1}} and {{location/address_2}} by walking", + "instantiation_dict": { + "location/address_1": "Carnegie Music Hall", + "location/address_2": "UPMC Shadyside" + }, + "intent": "Measure distance between Carnegie Music Hall and UPMC Shadyside by walking", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "2.2km" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "2.2km" + }, + "intent_template_id": 58 + }, + { + "sites": [ + "map" + ], + "task_id": 366, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Measure distance between {{location/address_1}} and {{location/address_2}} by walking", + "instantiation_dict": { + "location/address_1": "CVS (closet one)", + "location/address_2": "UPMC Shadyside" + }, + "intent": "Measure distance between CVS (closet one) and UPMC Shadyside by walking", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "1.2km" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1.2km" + }, + "intent_template_id": 58 + }, + { + "sites": [ + "map" + ], + "task_id": 367, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Measure distance between {{location/address_1}} and {{location/address_2}} by walking", + "instantiation_dict": { + "location/address_1": "Carnegie Mellon University", + "location/address_2": "CVS (closet one)" + }, + "intent": "Measure distance between Carnegie Mellon University and CVS (closet one) by walking", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "1.4km" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1.4km" + }, + "intent_template_id": 58 + }, + { + "sites": [ + "shopping" + ], + "task_id": 368, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "find discounted items.", + "instantiation_dict": {}, + "intent": "find discounted items.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no function to show only discount items", + "reference_answer_raw_annotation": "There is no function to show only discount items." + }, + "intent_template_id": 188 + }, + { + "sites": [ + "map" + ], + "task_id": 369, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Pull up the description page of {{location}} on Map", + "instantiation_dict": { + "location": "Carnegie Music Hall" + }, + "intent": "Pull up the description page of Carnegie Music Hall on Map", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Carnegie Music Hall" + ] + } + } + ] + }, + "intent_template_id": 52 + }, + { + "sites": [ + "map" + ], + "task_id": 370, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Pull up the description page of {{location}} on Map", + "instantiation_dict": { + "location": "Carnegie Mellon University" + }, + "intent": "Pull up the description page of Carnegie Mellon University on Map", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Carnegie Mellon University" + ] + } + } + ] + }, + "intent_template_id": 52 + }, + { + "sites": [ + "map" + ], + "task_id": 371, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Pull up the description page of {{location}} on Map", + "instantiation_dict": { + "location": "Piada restaurant near Pitt" + }, + "intent": "Pull up the description page of Piada restaurant near Pitt on Map", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Piada Italian Street Food", + "Forbes Avenue" + ] + } + } + ] + }, + "intent_template_id": 52 + }, + { + "sites": [ + "map" + ], + "task_id": 372, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Pull up the description page of {{location}} on Map", + "instantiation_dict": { + "location": "the Costco in Pittsburhg near a river" + }, + "intent": "Pull up the description page of the Costco in Pittsburhg near a river on Map", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Costco", + "Waterfront Drive West" + ] + } + } + ] + }, + "intent_template_id": 52 + }, + { + "sites": [ + "map" + ], + "task_id": 373, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Pull up the description page of {{location}} on Map", + "instantiation_dict": { + "location": "Whole Foods near Carnegie Mellon" + }, + "intent": "Pull up the description page of Whole Foods near Carnegie Mellon on Map", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Whole Foods", + "East Liberty" + ] + } + } + ] + }, + "intent_template_id": 52 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 374, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Preview the {{name}} theme for my shop", + "instantiation_dict": { + "name": "Magento Blank" + }, + "intent": "Preview the Magento Blank theme for my shop", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/admin/system_design_theme/edit/id/1", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 266 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 375, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Preview the {{name}} theme for my shop", + "instantiation_dict": { + "name": "Magento Luma" + }, + "intent": "Preview the Magento Luma theme for my shop", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/admin/system_design_theme/edit/id/3/key/", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 266 + }, + { + "sites": [ + "shopping" + ], + "task_id": 376, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Summarize customer reviews for {{product}}.", + "instantiation_dict": { + "product": "Amazon Echo Dot 3rd generation" + }, + "intent": "Summarize customer reviews for Amazon Echo Dot 3rd generation.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no review for this product", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 182 + }, + { + "sites": [ + "map" + ], + "task_id": 377, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the {{space}} around {{location}}", + "instantiation_dict": { + "location": "CMU ArtPark Lab", + "space": "resturants" + }, + "intent": "Find the resturants around CMU ArtPark Lab", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__MAP__/search?query=restaurants%20near%20CMU%20ArtPark%20Lab", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 59 + }, + { + "sites": [ + "map" + ], + "task_id": 378, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the {{space}} around {{location}}", + "instantiation_dict": { + "location": "CMU main campus", + "space": "parking" + }, + "intent": "Find the parking around CMU main campus", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__MAP__/search?query=parking%20near%20carnegie%20mellon%20university", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 59 + }, + { + "sites": [ + "map" + ], + "task_id": 379, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the {{space}} around {{location}}", + "instantiation_dict": { + "location": "CMU main campus", + "space": "hotel" + }, + "intent": "Find the hotel around CMU main campus", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__MAP__/search?query=hotels%20near%20carnegie%20mellon%20university", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 59 + }, + { + "sites": [ + "map" + ], + "task_id": 380, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the {{space}} around {{location}}", + "instantiation_dict": { + "location": "Carnegie Music Hall", + "space": "bar" + }, + "intent": "Find the bar around Carnegie Music Hall", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__MAP__/search?query=bars%20near%20Carnegie%20Music%20Hall", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 59 + }, + { + "sites": [ + "map" + ], + "task_id": 381, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the {{space}} around {{location}}", + "instantiation_dict": { + "location": "Carnegie Music Hall", + "space": "hotel" + }, + "intent": "Find the hotel around Carnegie Music Hall", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__MAP__/search?query=hotels%20near%20Carnegie%20Music%20Hall", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 59 + }, + { + "sites": [ + "map" + ], + "task_id": 382, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "I am arriving at Carnegie Mellon University. Find the nearby US Citizenship and Immigration Services and the walking distance to the nearest Social Security Administration from US Citizenship and Immigration Services", + "instantiation_dict": {}, + "intent": "I am arriving at Carnegie Mellon University. Find the nearby US Citizenship and Immigration Services and the walking distance to the nearest Social Security Administration from US Citizenship and Immigration Services", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no USCIS nearby", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 781 + }, + { + "sites": [ + "map" + ], + "task_id": 383, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "I am arriving at Pittsburgh Airport. Show me the name of a Hyatt hotel if there is any nearby. Tell me the names of supermarkets that are within 15mins driving from the hotel", + "instantiation_dict": {}, + "intent": "I am arriving at Pittsburgh Airport. Show me the name of a Hyatt hotel if there is any nearby. Tell me the names of supermarkets that are within 15mins driving from the hotel", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Hyatt Regency Pittsburgh International Airport", + "Giant Eagle", + "ALDI" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Hyatt Regency Pittsburgh International Airport Giant Eagle, ALDI" + }, + "intent_template_id": 782 + }, + { + "sites": [ + "shopping" + ], + "task_id": 384, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "List the customer names who complain about the quality of EYZUTAK phone cases", + "instantiation_dict": {}, + "intent": "List the customer names who complain about the quality of EYZUTAK phone cases", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Lisa Lee", + "Evelyn Kurver", + "Amanda", + "N Randall" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lisa Lee, Evelyn Kurver, Amanda, N Randall" + }, + "intent_template_id": 666 + }, + { + "sites": [ + "shopping" + ], + "task_id": 385, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "List the customer names who thinks EYZUTAK phone cases are of good looking", + "instantiation_dict": {}, + "intent": "List the customer names who thinks EYZUTAK phone cases are of good looking", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Lisa Lee", + "MH", + "Misba009", + "Amanda", + "N Randall", + "Amazon Customer", + "Cally", + "Bethany Robertson" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lisa Lee, MH, Misba009, Amanda, N Randall, Amazon Customer, Cally, Bethany Robertson" + }, + "intent_template_id": 666 + }, + { + "sites": [ + "shopping" + ], + "task_id": 386, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the rating of {{product}}", + "instantiation_dict": { + "product": "Ugreen lightning to 3.5mm cable" + }, + "intent": "What is the rating of Ugreen lightning to 3.5mm cable. Please round to the nearest whole number", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "65 |OR| 3" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Around 3.1 stars" + }, + "intent_template_id": 1355 + }, + { + "sites": [ + "shopping" + ], + "task_id": 387, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Who gave {{stars}} for phone cases from EYZUTAK", + "instantiation_dict": { + "stars": "4 or 5 stars" + }, + "intent": "Who gave 4 or 5 stars for phone cases from EYZUTAK", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "MH", + "Misba009", + "Amanda", + "Amazon Customer", + "Cally", + "Bethany Robertson" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "MH, Misba009, Amanda, Amazon Customer, Cally, Bethany Robertson, Bethany Robertson" + }, + "intent_template_id": 1356 + }, + { + "sites": [ + "shopping" + ], + "task_id": 388, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Who gave {{stars}} for phone cases from EYZUTAK", + "instantiation_dict": { + "stars": "1 or 2 stars" + }, + "intent": "Who gave 1 or 2 stars for phone cases from EYZUTAK", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Evelyn Kurver", + "N Randall" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Evelyn Kurver, N Randall" + }, + "intent_template_id": 1356 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 389, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Post \"{{content}}\" for the merge request related to {{mr}} in {{repo}} project", + "instantiation_dict": { + "mr": "octovisuals page", + "repo": "primer/design", + "content": "Thanks, working on reviews" + }, + "intent": "Post \"Thanks, working on reviews\" for the merge request related to octovisuals page in primer/design project", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/primer/design/-/merge_requests/450", + "locator": "", + "required_contents": { + "must_include": [ + "Thanks, working on reviews" + ] + } + } + ] + }, + "intent_template_id": 348 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 390, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Post \"{{content}}\" for the merge request related to {{mr}} in {{repo}} project", + "instantiation_dict": { + "mr": "semantic HTML post", + "repo": "a11yproject/a11yproject.com", + "content": "lgtm" + }, + "intent": "Post \"lgtm\" for the merge request related to semantic HTML post in a11yproject/a11yproject.com project", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/a11yproject/a11yproject.com/-/merge_requests/1531", + "locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.querySelector('.timeline-discussion-body').outerText", + "required_contents": { + "exact_match": "lgtm" + } + } + ] + }, + "intent_template_id": 348 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 391, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Post \"{{content}}\" for the merge request related to {{mr}} in {{repo}} project", + "instantiation_dict": { + "mr": "focus edge cases", + "repo": "a11yproject/a11yproject.com", + "content": "close because non reproducible" + }, + "intent": "Post \"close because non reproducible\" for the merge request related to focus edge cases in a11yproject/a11yproject.com project", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/a11yproject/a11yproject.com/-/merge_requests/1265", + "locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.querySelector('.timeline-discussion-body').outerText", + "required_contents": { + "exact_match": "close because non reproducible" + } + } + ] + }, + "intent_template_id": 348 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 392, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Post \"{{content}}\" for the merge request related to {{mr}} in {{repo}} project", + "instantiation_dict": { + "mr": "color ulitity", + "repo": "a11yproject.com", + "content": "Good idea" + }, + "intent": "Post \"Good idea\" for the merge request related to color ulitity in a11yproject.com project", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/a11yproject/a11yproject.com/-/merge_requests/1071", + "locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.querySelector('.timeline-discussion-body').outerText", + "required_contents": { + "exact_match": "Good idea" + } + } + ] + }, + "intent_template_id": 348 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 393, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Post \"{{content}}\" for the merge request related to {{mr}} in {{repo}} project", + "instantiation_dict": { + "mr": "fixing the broken links", + "repo": "byteblaze/empathy-prompts", + "content": "lgtm" + }, + "intent": "Post \"lgtm\" for the merge request related to fixing the broken links in byteblaze/empathy-prompts project", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/empathy-prompts/-/merge_requests/19", + "locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.querySelector('.timeline-discussion-body').outerText", + "required_contents": { + "exact_match": "lgtm" + } + } + ] + }, + "intent_template_id": 348 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 394, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Fork {{repo}}.", + "instantiation_dict": { + "repo": "2019-nCov" + }, + "intent": "Fork 2019-nCov.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/2019-nCov", + "locator": "", + "required_contents": { + "must_include": [ + "2019-nCov" + ] + } + } + ] + }, + "intent_template_id": 352 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 395, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Fork {{repo}}.", + "instantiation_dict": { + "repo": "the Pytorch GAN repo with most stars" + }, + "intent": "Fork the Pytorch GAN repo with most stars.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/PyTorch-GAN", + "locator": "", + "required_contents": { + "must_include": [ + "Pytorch-GAN" + ] + } + } + ] + }, + "intent_template_id": 352 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 396, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Fork {{repo}}.", + "instantiation_dict": { + "repo": "ChatGPT" + }, + "intent": "Fork ChatGPT.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/ChatGPT", + "locator": "", + "required_contents": { + "must_include": [ + "ChatGPT" + ] + } + } + ] + }, + "intent_template_id": 352 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 397, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Fork {{repo}}.", + "instantiation_dict": { + "repo": "MetaSeq" + }, + "intent": "Fork MetaSeq.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/metaseq", + "locator": "", + "required_contents": { + "must_include": [ + "metaseq" + ] + } + } + ] + }, + "intent_template_id": 352 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 398, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Fork {{repo}}.", + "instantiation_dict": { + "repo": "all source repos from Akilesh Kannan" + }, + "intent": "Fork all source repos from Akilesh Kannan", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/SimCache", + "locator": "", + "required_contents": { + "must_include": [ + "SimCache" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/dots", + "locator": "", + "required_contents": { + "must_include": [ + "dots" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/CacheEval", + "locator": "", + "required_contents": { + "must_include": [ + "CacheEval" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/nvidia-patch", + "locator": "", + "required_contents": { + "must_include": [ + "404" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/viewgrades-scraper", + "locator": "", + "required_contents": { + "must_include": [ + "404" + ] + } + } + ] + }, + "intent_template_id": 352 + }, + { + "sites": [ + "reddit" + ], + "task_id": 399, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Change my reddit bio to \"{{content}}\"", + "instantiation_dict": { + "content": "I am a robot" + }, + "intent": "Change my reddit bio to \"I am a robot\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/MarvelsGrantMan136", + "locator": "document.querySelector(\".user-bio__biography\").outerText", + "required_contents": { + "exact_match": "I am a robot" + } + } + ] + }, + "intent_template_id": 6 + }, + { + "sites": [ + "reddit" + ], + "task_id": 400, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Change my reddit bio to \"{{content}}\"", + "instantiation_dict": { + "content": "Pro Python Developer with 20 years of Experience" + }, + "intent": "Change my reddit bio to \"Pro Python Developer with 20 years of Experience\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/MarvelsGrantMan136", + "locator": "document.querySelector(\".user-bio__biography\").outerText", + "required_contents": { + "exact_match": "Pro Python Developer with 20 years of Experience" + } + } + ] + }, + "intent_template_id": 6 + }, + { + "sites": [ + "reddit" + ], + "task_id": 401, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Change my reddit bio to \"{{content}}\"", + "instantiation_dict": { + "content": "Seeking SDE positions" + }, + "intent": "Change my reddit bio to \"Seeking SDE positions\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/MarvelsGrantMan136", + "locator": "document.querySelector(\".user-bio__biography\").outerText", + "required_contents": { + "exact_match": "Seeking SDE positions" + } + } + ] + }, + "intent_template_id": 6 + }, + { + "sites": [ + "reddit" + ], + "task_id": 402, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Change my reddit bio to \"{{content}}\"", + "instantiation_dict": { + "content": "Freelance Web Developer" + }, + "intent": "Change my reddit bio to \"Freelance Web Developer\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/MarvelsGrantMan136", + "locator": "document.querySelector(\".user-bio__biography\").outerText", + "required_contents": { + "exact_match": "Freelance Web Developer" + } + } + ] + }, + "intent_template_id": 6 + }, + { + "sites": [ + "reddit" + ], + "task_id": 403, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Change my reddit bio to \"{{content}}\"", + "instantiation_dict": { + "content": "Awesome Prompt Artist" + }, + "intent": "Change my reddit bio to \"Awesome Prompt Artist\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/MarvelsGrantMan136", + "locator": "document.querySelector(\".user-bio__biography\").outerText", + "required_contents": { + "exact_match": "Awesome Prompt Artist" + } + } + ] + }, + "intent_template_id": 6 + }, + { + "sites": [ + "reddit" + ], + "task_id": 404, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Upvote the newest post in {{subreddit}} subreddit", + "instantiation_dict": { + "subreddit": "books" + }, + "intent": "Upvote the newest post in books subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/books/124260/adults-reading-to-each-other-out-loud", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 22 + }, + { + "sites": [ + "reddit" + ], + "task_id": 405, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Upvote the newest post in {{subreddit}} subreddit", + "instantiation_dict": { + "subreddit": "DIY" + }, + "intent": "Upvote the newest post in DIY subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/DIY/119019/how-can-i-bring-an-hdmi-cable-from-my-pc-downstairs-to-my-tv", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 22 + }, + { + "sites": [ + "reddit" + ], + "task_id": 406, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Upvote the newest post in {{subreddit}} subreddit", + "instantiation_dict": { + "subreddit": "future technology" + }, + "intent": "Upvote the newest post in future technology subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/Futurology/119517/openai-ceo-it-s-not-funny-that-i-m-afraid-of-the-ai-we-re", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 22 + }, + { + "sites": [ + "reddit" + ], + "task_id": 407, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Upvote the newest post in {{subreddit}} subreddit", + "instantiation_dict": { + "subreddit": "deeplearning" + }, + "intent": "Upvote the newest post in deeplearning subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/deeplearning/124993/meta-s-llama-weights-leaked-on-torrent-and-the-best-thing", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 22 + }, + { + "sites": [ + "reddit" + ], + "task_id": 408, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Upvote the newest post in {{subreddit}} subreddit", + "instantiation_dict": { + "subreddit": "explain like im 5" + }, + "intent": "Upvote the newest post in explain like im 5 subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/explainlikeimfive/39244/eli5-how-does-pinching-a-ribbon-and-sliding-your-finger", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 22 + }, + { + "sites": [ + "reddit" + ], + "task_id": 409, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/books/59421/friendly-reminder-bookshop-org-exists", + "geolocation": null, + "intent_template": "Reply to {{position_description}} in this post with \"{{content_description}}\"", + "instantiation_dict": { + "position_description": "the manager of the website", + "content_description": "thanks! I am a big fan of your website." + }, + "intent": "Reply to the manager of the website in this post with \"thanks! I am a big fan of your website.\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/books/59421/-/comment/1235250", + "locator": "", + "required_contents": { + "must_include": [ + "thanks! I am a big fan of your website." + ] + } + } + ] + }, + "intent_template_id": 23 + }, + { + "sites": [ + "reddit" + ], + "task_id": 410, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/singularity/69404/this-is-how-chatgpt-sees-itself", + "geolocation": null, + "intent_template": "Reply to {{position_description}} in this post with \"{{content_description}}\"", + "instantiation_dict": { + "position_description": "the first reply", + "content_description": "don't panic" + }, + "intent": "Reply to the first reply in this post with \"don't panic\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/singularity/69404/-/comment/1042264", + "locator": "", + "required_contents": { + "must_include": [ + "don't panic" + ] + } + } + ] + }, + "intent_template_id": 23 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 411, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Make the LICENSE of {{repo}} to MIT license.", + "instantiation_dict": { + "repo": "byteblaze/cloud-to-butt" + }, + "intent": "Make the LICENSE of byteblaze/cloud-to-butt to MIT license.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/cloud-to-butt/-/blob/master/LICENSE.txt", + "locator": "", + "required_contents": { + "must_include": [ + "MIT license" + ] + } + } + ] + }, + "intent_template_id": 355 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 412, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Make the LICENSE of {{repo}} to Apache License", + "instantiation_dict": { + "repo": "byteblaze/accessible-html-content-patterns" + }, + "intent": "Make the LICENSE of byteblaze/accessible-html-content-patterns to Apache License", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/accessible-html-content-patterns/-/blob/main/LICENSE", + "locator": "", + "required_contents": { + "must_include": [ + "Apache License", + "http://www.apache.org/licenses/LICENSE-2.0" + ] + } + } + ] + }, + "intent_template_id": 355 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 413, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Make the LICENSE of {{repo}} to one that mandates all copies and derivative works to be under the same license", + "instantiation_dict": { + "repo": "byteblaze/a11y-syntax-highlighting" + }, + "intent": "Make the LICENSE of byteblaze/a11y-syntax-highlighting to one that mandates all copies and derivative works to be under the same license", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/a11y-syntax-highlighting/-/blob/main/LICENSE", + "locator": "", + "required_contents": { + "must_include": [ + "GENERAL PUBLIC LICENSE" + ] + } + } + ] + }, + "intent_template_id": 355 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 414, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Make the LICENSE of {{repo}} to MIT license.", + "instantiation_dict": { + "repo": "byteblaze/dotfiles" + }, + "intent": "Make the LICENSE of byteblaze/dotfiles to MIT license.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/dotfiles/-/blob/main/LICENSE", + "locator": "", + "required_contents": { + "must_include": [ + "MIT license", + "The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software." + ] + } + } + ] + }, + "intent_template_id": 355 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 415, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Go to the merge request on {{topic}} I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.", + "instantiation_dict": { + "topic": "verification functions" + }, + "intent": "Go to the merge request on verification functions I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/a11y-webring.club/-/merge_requests/40", + "locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.querySelector('.timeline-discussion-body').outerText", + "required_contents": { + "exact_match": "@davepgreene" + } + } + ] + }, + "intent_template_id": 360 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 416, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Go to the merge request on {{topic}} I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.", + "instantiation_dict": { + "topic": "wcag" + }, + "intent": "Go to the merge request on wcag I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/a11yproject/a11yproject.com/-/merge_requests/1270", + "locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.querySelector('.timeline-discussion-body').outerText", + "required_contents": { + "exact_match": "Thank you" + } + } + ] + }, + "intent_template_id": 360 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 417, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Go to the merge request on {{topic}} I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.", + "instantiation_dict": { + "topic": "404 link" + }, + "intent": "Go to the merge request on 404 link I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/a11yproject/a11yproject.com/-/merge_requests/1485", + "locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.querySelector('.timeline-discussion-body').outerText", + "required_contents": { + "exact_match": "@Roshanjossey" + } + } + ] + }, + "intent_template_id": 360 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 418, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Set my gitlab status as {{status}}.", + "instantiation_dict": { + "status": "Busy" + }, + "intent": "Set my gitlab status as Busy.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze", + "locator": "document.querySelector('.cover-status').lastChild.textContent", + "required_contents": { + "exact_match": "Busy" + } + } + ] + }, + "intent_template_id": 361 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 419, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Set my gitlab status as {{status}}.", + "instantiation_dict": { + "status": "Enjoying life" + }, + "intent": "Set my gitlab status as Enjoying life.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze", + "locator": "document.querySelector('.cover-status').lastChild.textContent", + "required_contents": { + "exact_match": "Enjoying life" + } + } + ] + }, + "intent_template_id": 361 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 420, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Set my gitlab status as {{status}}.", + "instantiation_dict": { + "status": "Playing Badminton" + }, + "intent": "Set my gitlab status as Playing Badminton.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze", + "locator": "document.querySelector('.cover-status').lastChild.textContent", + "required_contents": { + "exact_match": "Playing Badminton" + } + } + ] + }, + "intent_template_id": 361 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 421, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Set my gitlab status as {{status}}.", + "instantiation_dict": { + "status": "Resting due to leg injury" + }, + "intent": "Set my gitlab status as Resting due to leg injury.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze", + "locator": "document.querySelector('.cover-status').lastChild.textContent", + "required_contents": { + "exact_match": "Resting due to leg injury" + } + } + ] + }, + "intent_template_id": 361 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 422, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Set my gitlab status as {{status}}.", + "instantiation_dict": { + "status": "Out of Office" + }, + "intent": "Set my gitlab status as Out of Office.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze", + "locator": "document.querySelector('.cover-status').lastChild.textContent", + "required_contents": { + "exact_match": "Out of Office" + } + } + ] + }, + "intent_template_id": 361 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 423, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Mark all {{brand}} shirts on sale", + "instantiation_dict": { + "brand": "Hollister" + }, + "intent": "Mark all Hollister shirts on sale", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/126/", + "locator": "document.querySelector('input[name=\"product[sale]\"]').value", + "required_contents": { + "exact_match": "1" + } + } + ] + }, + "intent_template_id": 237 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 424, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the place where Mr. Rogers was filmed" + }, + "intent": "Find the page of the place where Mr. Rogers was filmed on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Pittsburgh" + ] + } + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 425, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the longest bridge in the Western hemisphere" + }, + "intent": "Find the page of the longest bridge in the Western hemisphere on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Mackinac Bridge" + ] + } + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 426, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the place in Pennsylvania where a plane crashed during the September 11th attacks" + }, + "intent": "Find the page of the place in Pennsylvania where a plane crashed during the September 11th attacks on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Somerset County" + ] + } + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 427, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the university that has most Turning Award winners" + }, + "intent": "Find the page of the university that has most Turning Award winners on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Massachusetts Institute of Technology" + ] + } + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 428, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the undergrad college of the person who developed the Nash equilibrium" + }, + "intent": "Find the page of the undergrad college of the person who developed the Nash equilibrium on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Carnegie Mellon University" + ] + } + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 429, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the colleges where The Chair was filmed in Pittsburgh" + }, + "intent": "Find the page of the colleges where The Chair was filmed in Pittsburgh on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Chatham University" + ] + } + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 430, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the college(s) where The Chair was filmed in Pennsylvania other than the ones in Pittsburgh" + }, + "intent": "Find the page of the college(s) where The Chair was filmed in Pennsylvania other than the ones in Pittsburgh on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Washington & Jefferson College" + ] + } + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "shopping" + ], + "task_id": 431, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/tall-pink-taper-candles-4-piece-orange-colored-tapered-candles-gradient-candles-10-6-inches-tall-tie-dye-candle-set-large-dripless-long-burning-candlesticks-two-color-taper-candles-candlesticks.html |AND| __SHOPPING__/spaas-white-taper-candles-4-pack-10-inch-tall-candles-scent-free-premium-wax-candle-sticks-8-hour-long-burning-white-candlesticks-for-home-decoration-wedding-holiday-and-parties.html |AND| __SHOPPING__/white-starfish-wall-candle-sconces-set-of-2-beach-decor-ocean-themed-wall-mount-candleholders-nautical-style-beach-bathroom-decor-coastal-farmhouse-seashell-candle-holders.html", + "geolocation": null, + "intent_template": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "instantiation_dict": {}, + "intent": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "SPAAS White Taper Candles - 4 Pack |OR| 10 Inch Tall Candles, Scent-Free Premium Wax Candle Sticks |OR| 8 Hour Long Burning White Candlesticks for Home Decoration, Wedding, Holiday and Parties" + ] + } + } + ] + }, + "intent_template_id": 145 + }, + { + "sites": [ + "shopping" + ], + "task_id": 432, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/ciclon-energy-drink-regular-24-cans-8-3oz.html |AND| __SHOPPING__/v8-energy-healthy-energy-drink-steady-energy-from-black-and-green-tea-pomegranate-blueberry-8-ounce-can-pack-of-24.html", + "geolocation": null, + "intent_template": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "instantiation_dict": {}, + "intent": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "V8 +Energy, Healthy Energy Drink, Steady Energy from Black and Green Tea, Pomegranate Blueberry, 8 Ounce Can ,Pack of 24" + ] + } + } + ] + }, + "intent_template_id": 145 + }, + { + "sites": [ + "shopping" + ], + "task_id": 433, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/tazrigo-5pcs-white-dental-resin-brush-pens-dental-shaping-silicone-tooth-tool.html |AND| __SHOPPING__/stylus-pens-for-touch-screens-2-pcs-universal-stylus-2-in-1-2022-updated-touch-screen-pens-for-all-touch-screens-cell-phones-tablets-laptops-with-6-replacement-tips-4-discstips-2-fiber-tips.html", + "geolocation": null, + "intent_template": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "instantiation_dict": {}, + "intent": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "Tazrigo 5pcs White Dental Resin Brush Pens Dental Shaping Silicone Tooth Tool" + ] + } + } + ] + }, + "intent_template_id": 145 + }, + { + "sites": [ + "shopping" + ], + "task_id": 434, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/3-pairs-ruffle-socks-lace-ankle-socks-for-girls-frilly-socks-women-decorative.html |AND| __SHOPPING__/viviki-women-glitter-socks-ultrathin-transparent-tulle-lace-socks-no-show-ankle-crew-socks-3-pack.html", + "geolocation": null, + "intent_template": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "instantiation_dict": {}, + "intent": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "VIVIKI Women Glitter Socks Ultrathin Transparent Tulle Lace Socks - No Show Ankle Crew Socks 3 Pack" + ] + } + } + ] + }, + "intent_template_id": 145 + }, + { + "sites": [ + "shopping" + ], + "task_id": 435, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/35-ft-hdmi-cable-gearit-pro-series-hdmi-cable-35-feet-high-speed-ethernet-4k-resolution-3d-video-and-arc-audio-return-channel-hdmi-cable-white.html |AND| __SHOPPING__/dp-to-hdmi-cable-6ft-2-pack-fosmon-gold-plated-displayport-to-hdmi-cable-1080p-full-hd-for-pcs-to-hdtv-monitor-projector-with-hdmi-port.html", + "geolocation": null, + "intent_template": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "instantiation_dict": {}, + "intent": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "DP to HDMI Cable 6FT (2 Pack), Fosmon Gold Plated Displayport to HDMI Cable 1080p Full HD for PCs to HDTV, Monitor, Projector with HDMI Port" + ] + } + } + ] + }, + "intent_template_id": 145 + }, + { + "sites": [ + "shopping" + ], + "task_id": 436, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I previously ordered some {{product}} {{time}} and later cancelled. Can you reorder it for me?", + "instantiation_dict": { + "product": "a mattress foundation", + "time": "around Feb or March 2023" + }, + "intent": "I previously ordered some a mattress foundation around Feb or March 2023 and later cancelled. Can you reorder it for me?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07DFJ5XKH" + ] + } + } + ] + }, + "intent_template_id": 156 + }, + { + "sites": [ + "shopping" + ], + "task_id": 437, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I previously ordered some {{product}} {{time}} and later cancelled. Can you reorder it for me?", + "instantiation_dict": { + "product": "a table lamp", + "time": "in May 2023" + }, + "intent": "I previously ordered some a table lamp in May 2023 and later cancelled. Can you reorder it for me?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B072XS3F6W" + ] + } + } + ] + }, + "intent_template_id": 156 + }, + { + "sites": [ + "shopping" + ], + "task_id": 438, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I previously ordered some {{product}} {{time}} and later cancelled. Can you reorder it for me?", + "instantiation_dict": { + "product": "a TV stand", + "time": "sometime around sep 2022" + }, + "intent": "I previously ordered some a TV stand sometime around sep 2022 and later cancelled. Can you reorder it for me?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B08PVHRRB7" + ] + } + } + ] + }, + "intent_template_id": 156 + }, + { + "sites": [ + "shopping" + ], + "task_id": 439, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I previously ordered some {{product}} {{time}} and later cancelled. Can you reorder it for me?", + "instantiation_dict": { + "product": "a cat t-shirt", + "time": "during 2022" + }, + "intent": "I previously ordered some a cat t-shirt during 2022 and later cancelled. Can you reorder it for me?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B0844BWS76" + ] + } + } + ] + }, + "intent_template_id": 156 + }, + { + "sites": [ + "shopping" + ], + "task_id": 440, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I previously ordered some {{product}} {{time}} and later cancelled. Can you reorder it for me?", + "instantiation_dict": { + "product": "a make up removal kit", + "time": "during summer 2022" + }, + "intent": "I previously ordered some a make up removal kit during summer 2022 and later cancelled. Can you reorder it for me?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B0738JQG6Q" + ] + } + } + ] + }, + "intent_template_id": 156 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 441, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/byteblaze/gimmiethat.space", + "geolocation": null, + "intent_template": "Update the project site's title to \"{{title}}\"", + "instantiation_dict": { + "title": "GIVE ME SPACE" + }, + "intent": "Update the project site's title to \"GIVE ME SPACE\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/index.html", + "locator": "", + "required_contents": { + "must_include": [ + "GIVE ME SPACE" + ] + } + } + ] + }, + "intent_template_id": 308 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 442, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/byteblaze/gimmiethat.space", + "geolocation": null, + "intent_template": "Update the project site's title to \"{{title}}\"", + "instantiation_dict": { + "title": "Welcome to my site" + }, + "intent": "Update the project site's title to \"Welcome to my site\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/index.html", + "locator": "", + "required_contents": { + "must_include": [ + "Welcome to my site" + ] + } + } + ] + }, + "intent_template_id": 308 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 443, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/byteblaze/gimmiethat.space", + "geolocation": null, + "intent_template": "Update the project site's title to \"{{title}}\"", + "instantiation_dict": { + "title": "Not an interesting site" + }, + "intent": "Update the project site's title to \"Not an interesting site\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/index.html", + "locator": "", + "required_contents": { + "must_include": [ + "Not an interesting site" + ] + } + } + ] + }, + "intent_template_id": 308 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 444, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/byteblaze/gimmiethat.space", + "geolocation": null, + "intent_template": "Update the project site's title to \"{{title}}\"", + "instantiation_dict": { + "title": "Title Wanted" + }, + "intent": "Update the project site's title to \"Title Wanted\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/index.html", + "locator": "", + "required_contents": { + "must_include": [ + "Title Wanted" + ] + } + } + ] + }, + "intent_template_id": 308 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 445, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/byteblaze/gimmiethat.space", + "geolocation": null, + "intent_template": "Update the project site's title to \"{{title}}\"", + "instantiation_dict": { + "title": "Hello" + }, + "intent": "Update the project site's title to \"Hello\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/index.html", + "locator": "", + "required_contents": { + "must_include": [ + "Hello" + ] + } + } + ] + }, + "intent_template_id": 308 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 446, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Assign the issue regarding {{issue}} in {{repo}} to {{account}}.", + "instantiation_dict": { + "repo": "a11yproject", + "issue": 404, + "account": "Roshanjossey" + }, + "intent": "Assign the issue regarding 404 in a11yproject to Roshanjossey.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/dashboard/issues?scope=all&state=opened&assignee_username=Roshanjossey", + "locator": "", + "required_contents": { + "must_include": [ + "404s, bad host, timeouts, bad urls for URLs linked from website" + ] + } + } + ] + }, + "intent_template_id": 999 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 447, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Assign the issue regarding {{issue}} in {{repo}} to {{account}}.", + "instantiation_dict": { + "repo": "a11y-webring.club", + "issue": "linking to an accessibility statement", + "account": "Rohan" + }, + "intent": "Assign the issue regarding linking to an accessibility statement in a11y-webring.club to Rohan.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/dashboard/issues?scope=all&state=opened&assignee_username=Seirdy", + "locator": "", + "required_contents": { + "must_include": [ + "linking to an accessibility statement" + ] + } + } + ] + }, + "intent_template_id": 999 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 448, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "set the homepage URL on my GitLab profile to {{url}}", + "instantiation_dict": { + "url": "https://egg.tart.com" + }, + "intent": "set the homepage URL on my GitLab profile to https://egg.tart.com", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze", + "locator": "document.querySelector('.profile-header [itemprop=\"url\"]').outerText", + "required_contents": { + "exact_match": "egg.tart.com" + } + } + ] + }, + "intent_template_id": 331 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 449, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "set the homepage URL on my GitLab profile to {{url}}", + "instantiation_dict": { + "url": "https://helloworld.xyz" + }, + "intent": "set the homepage URL on my GitLab profile to https://helloworld.xyz", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze", + "locator": "document.querySelector('.profile-header [itemprop=\"url\"]').outerText", + "required_contents": { + "exact_match": "helloworld.xyz" + } + } + ] + }, + "intent_template_id": 331 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 450, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "set the homepage URL on my GitLab profile to {{url}}", + "instantiation_dict": { + "url": "a11yproject.contributor.me" + }, + "intent": "set the homepage URL on my GitLab profile to a11yproject.contributor.me", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze", + "locator": "document.querySelector('.profile-header [itemprop=\"url\"]').outerText", + "required_contents": { + "exact_match": "a11yproject.contributor.me" + } + } + ] + }, + "intent_template_id": 331 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 451, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "set the homepage URL on my GitLab profile to {{url}}", + "instantiation_dict": { + "url": "www.byteblaze.com" + }, + "intent": "set the homepage URL on my GitLab profile to www.byteblaze.com", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze", + "locator": "document.querySelector('.profile-header [itemprop=\"url\"]').outerText", + "required_contents": { + "exact_match": "www.byteblaze.com" + } + } + ] + }, + "intent_template_id": 331 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 452, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "set the homepage URL on my GitLab profile to {{url}}", + "instantiation_dict": { + "url": "byteblaze.github.io" + }, + "intent": "set the homepage URL on my GitLab profile to byteblaze.github.io", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze", + "locator": "document.querySelector('.profile-header [itemprop=\"url\"]').outerText", + "required_contents": { + "exact_match": "byteblaze.github.io" + } + } + ] + }, + "intent_template_id": 331 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 453, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Disable {{product}} from the site, they are facing some quality issues.", + "instantiation_dict": { + "product": "Teton pullover hoodie" + }, + "intent": "Disable Teton pullover hoodie from the site, they are facing some quality issues.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/78/", + "locator": "document.querySelector('[name=\"product[status]\"').value", + "required_contents": { + "exact_match": "2" + } + } + ] + }, + "intent_template_id": 242 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 454, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Disable {{product}} from the site, they are facing some quality issues.", + "instantiation_dict": { + "product": "Ryker Tee Crew Neck" + }, + "intent": "Disable Ryker Tee Crew Neck from the site, they are facing some quality issues.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/478/", + "locator": "document.querySelector('[name=\"product[status]\"').value", + "required_contents": { + "exact_match": "2" + } + } + ] + }, + "intent_template_id": 242 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 455, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Disable {{product}} from the site, they are facing some quality issues.", + "instantiation_dict": { + "product": "lHelios Endurance Tank" + }, + "intent": "Disable lHelios Endurance Tank from the site, they are facing some quality issues.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/676/", + "locator": "document.querySelector('[name=\"product[status]\"').value", + "required_contents": { + "exact_match": "2" + } + } + ] + }, + "intent_template_id": 242 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 456, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Disable {{product}} from the site, they are facing some quality issues.", + "instantiation_dict": { + "product": "Cora Pant" + }, + "intent": "Disable Cora Pant from the site, they are facing some quality issues.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1840/", + "locator": "document.querySelector('[name=\"product[status]\"').value", + "required_contents": { + "exact_match": "2" + } + } + ] + }, + "intent_template_id": 242 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 457, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Disable {{product}} from the site, they are facing some quality issues.", + "instantiation_dict": { + "product": "Karmen yoga pants" + }, + "intent": "Disable Karmen yoga pants from the site, they are facing some quality issues.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1819/", + "locator": "document.querySelector('[name=\"product[status]\"').value", + "required_contents": { + "exact_match": "2" + } + } + ] + }, + "intent_template_id": 242 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 458, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1481/", + "geolocation": null, + "intent_template": "{{action}} the price of this product by {{amount}}", + "instantiation_dict": { + "amount": "$5", + "action": "Reduce" + }, + "intent": "Reduce the price of this product by $5", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1481/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "27.00" + } + } + ] + }, + "intent_template_id": 247 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 459, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__/catalog/product/edit/id/237/", + "geolocation": null, + "intent_template": "{{action}} the price of this product by {{amount}}", + "instantiation_dict": { + "amount": "10%", + "action": "Reduce" + }, + "intent": "Reduce the price of this product by 10%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/237/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "62.10" + } + } + ] + }, + "intent_template_id": 247 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 460, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__/catalog/product/edit/id/418/", + "geolocation": null, + "intent_template": "{{action}} the price of this product by {{amount}}", + "instantiation_dict": { + "amount": "15%", + "action": "Reduce" + }, + "intent": "Reduce the price of this product by 15%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/418/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "38.25" + } + } + ] + }, + "intent_template_id": 247 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 461, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__/catalog/product/edit/id/721/", + "geolocation": null, + "intent_template": "{{action}} the price of this product by {{amount}}", + "instantiation_dict": { + "amount": "$11.5", + "action": "Increase" + }, + "intent": "Increase the price of this product by $11.5", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/721/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "29.50" + } + } + ] + }, + "intent_template_id": 247 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 462, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__/catalog/product/edit/id/710/", + "geolocation": null, + "intent_template": "{{action}} the price of this product by {{amount}}", + "instantiation_dict": { + "amount": "10%", + "action": "Increase" + }, + "intent": "Increase the price of this product by 10%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/710/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "19.80" + } + } + ] + }, + "intent_template_id": 247 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 463, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__/catalog/product/edit/id/996/", + "geolocation": null, + "intent_template": "{{action}} the price of this product by {{amount}}", + "instantiation_dict": { + "amount": "15%", + "action": "Increase" + }, + "intent": "Increase the price of this product by 15%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/996/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "36.80" + } + } + ] + }, + "intent_template_id": 247 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 464, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Update the product description of {{product}} to highlight the real user positive reviews by quoting the comments", + "instantiation_dict": { + "product": "Antonia Racer Tank" + }, + "intent": "Update the product description of Antonia Racer Tank to highlight the real user positive reviews by quoting the comments", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/../antonia-racer-tank.html", + "locator": "document.querySelector('.data.item.content').outerText + (document.querySelector('.product.attribute.overview [itemprop=\"description\"]')?.outerText || '')", + "required_contents": { + "must_include": [ + "This is in regular rotation at the gym", + "Its colorful and looks kinda cute under my exercise tanks", + "it's very stylish for yoga or something else low impact" + ] + } + } + ] + }, + "intent_template_id": 251 + }, + { + "sites": [ + "shopping" + ], + "task_id": 465, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add {{product}} to my wish list", + "instantiation_dict": { + "product": "Tide PODS Spring Meadow Scent HE Turbo Laundry Detergent Pacs, 81 Count" + }, + "intent": "Add Tide PODS Spring Meadow Scent HE Turbo Laundry Detergent Pacs, 81 Count to my wish list", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "Tide PODS Spring Meadow Scent HE Turbo Laundry Detergent Pacs, 81 Count" + ] + } + } + ] + }, + "intent_template_id": 186 + }, + { + "sites": [ + "shopping" + ], + "task_id": 466, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add {{product}} to my wish list", + "instantiation_dict": { + "product": "2 Hawaiian Bamboo Orchid Roots #zc50 - by Discount Hawaiian Gifts" + }, + "intent": "Add 2 Hawaiian Bamboo Orchid Roots #zc50 - by Discount Hawaiian Gifts to my wish list", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "2 Hawaiian Bamboo Orchid Roots #zc50 - by Discount Hawaiian Gifts" + ] + } + } + ] + }, + "intent_template_id": 186 + }, + { + "sites": [ + "shopping" + ], + "task_id": 467, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add {{product}} to my wish list", + "instantiation_dict": { + "product": "HONGJ Hawaiian Beach Outfits Set for Mens, Summer Tropical Tree Printed Relaxed-fit Hawaii Shirts Shorts 2 Piece Suits" + }, + "intent": "Add HONGJ Hawaiian Beach Outfits Set for Mens, Summer Tropical Tree Printed Relaxed-fit Hawaii Shirts Shorts 2 Piece Suits to my wish list", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "HONGJ Hawaiian Beach Outfits Set for Mens, Summer Tropical Tree Printed Relaxed-fit Hawaii Shirts Shorts 2 Piece Suits" + ] + } + } + ] + }, + "intent_template_id": 186 + }, + { + "sites": [ + "shopping" + ], + "task_id": 468, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add {{product}} to my wish list", + "instantiation_dict": { + "product": "DkRgVNY Lace Spcling Lingerie Womens Sexy Hollow Out Underwear Bodysuit One Piece Snap Crotch Clubwear Teddy Bodysuit" + }, + "intent": "Add DkRgVNY Lace Spcling Lingerie Womens Sexy Hollow Out Underwear Bodysuit One Piece Snap Crotch Clubwear Teddy Bodysuit to my wish list", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "DkRgVNY Lace Spcling Lingerie Womens Sexy Hollow Out Underwear Bodysuit One Piece Snap Crotch Clubwear Teddy Bodysuit" + ] + } + } + ] + }, + "intent_template_id": 186 + }, + { + "sites": [ + "shopping" + ], + "task_id": 469, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add {{product}} to my wish list", + "instantiation_dict": { + "product": "Light Blue Simple Summer New Low Heels Slippers for Women Fashion Chunky Heels Pointed Toe Wine Glasses Sandals Comfortable Walking Shoes Ladies All-Match Sexy Party Shoes" + }, + "intent": "Add Light Blue Simple Summer New Low Heels Slippers for Women Fashion Chunky Heels Pointed Toe Wine Glasses Sandals Comfortable Walking Shoes Ladies All-Match Sexy Party Shoes to my wish list", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "Light Blue Simple Summer New Low Heels Slippers for Women Fashion Chunky Heels Pointed Toe Wine Glasses Sandals Comfortable Walking Shoes Ladies All-Match Sexy Party Shoes" + ] + } + } + ] + }, + "intent_template_id": 186 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 470, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Cancel order {{id}}", + "instantiation_dict": { + "id": "302" + }, + "intent": "Cancel order 302", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/302/", + "locator": "document.querySelector(\"#order_status\").outerText", + "required_contents": { + "exact_match": "Canceled" + } + } + ] + }, + "intent_template_id": 257 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 471, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Cancel order {{id}}", + "instantiation_dict": { + "id": "307" + }, + "intent": "Cancel order 307", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/307/", + "locator": "document.querySelector(\"#order_status\").outerText", + "required_contents": { + "exact_match": "Canceled" + } + } + ] + }, + "intent_template_id": 257 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 472, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Cancel order {{id}}", + "instantiation_dict": { + "id": "299" + }, + "intent": "Cancel order 299", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/299/", + "locator": "document.querySelector(\"#order_status\").outerText", + "required_contents": { + "exact_match": "Canceled" + } + } + ] + }, + "intent_template_id": 257 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 473, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Cancel order {{id}}", + "instantiation_dict": { + "id": "301" + }, + "intent": "Cancel order 301", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/301/", + "locator": "document.querySelector(\"#order_status\").outerText", + "required_contents": { + "exact_match": "Canceled" + } + } + ] + }, + "intent_template_id": 257 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 474, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Cancel order {{id}}", + "instantiation_dict": { + "id": "305" + }, + "intent": "Cancel order 305", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/305/", + "locator": "document.querySelector(\"#order_status\").outerText", + "required_contents": { + "exact_match": "Canceled" + } + } + ] + }, + "intent_template_id": 257 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 475, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Set up a new, empty repository with the name {{project_name}}?", + "instantiation_dict": { + "project_name": "chatgpt_plugin" + }, + "intent": "Set up a new, empty repository with the name chatgpt_plugin?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/chatgpt_plugin", + "locator": "", + "required_contents": { + "must_include": [ + "chatgpt_plugin" + ] + } + } + ] + }, + "intent_template_id": 292 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 476, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Set up a new, empty repository with the name {{project_name}}?", + "instantiation_dict": { + "project_name": "awesome_llm_reading" + }, + "intent": "Set up a new, empty repository with the name awesome_llm_reading?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/awesome_llm_reading", + "locator": "", + "required_contents": { + "must_include": [ + "awesome_llm_reading" + ] + } + } + ] + }, + "intent_template_id": 292 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 477, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Set up a new, empty repository with the name {{project_name}}?", + "instantiation_dict": { + "project_name": "awesome_program_aided_reasoning" + }, + "intent": "Set up a new, empty repository with the name awesome_program_aided_reasoning?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/awesome_program_aided_reasoning", + "locator": "", + "required_contents": { + "must_include": [ + "awesome_program_aided_reasoning" + ] + } + } + ] + }, + "intent_template_id": 292 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 478, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Set up a new, empty repository with the name {{project_name}}?", + "instantiation_dict": { + "project_name": "webagent" + }, + "intent": "Set up a new, empty repository with the name webagent?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/webagent", + "locator": "", + "required_contents": { + "must_include": [ + "webagent" + ] + } + } + ] + }, + "intent_template_id": 292 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 479, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Set up a new, empty repository with the name {{project_name}}?", + "instantiation_dict": { + "project_name": "awesome_webagent" + }, + "intent": "Set up a new, empty repository with the name awesome_webagent?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/awesome_webagent", + "locator": "", + "required_contents": { + "must_include": [ + "awesome_webagent" + ] + } + } + ] + }, + "intent_template_id": 292 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 480, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Invite {{collaborator_account_list}} as collaborator to {{repo}}", + "instantiation_dict": { + "collaborator_account_list": "yjlou", + "repo": "solarized-prism-theme" + }, + "intent": "Invite yjlou as collaborator to solarized-prism-theme", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/solarized-prism-theme/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "yjlou" + ] + } + } + ] + }, + "intent_template_id": 293 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 481, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "{{name}} wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "instantiation_dict": { + "name": "Abishek" + }, + "intent": "Abishek wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/dotfiles/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'abisubramanya27')", + "required_contents": { + "must_include": [ + "Guest" + ] + } + } + ] + }, + "intent_template_id": 294 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 482, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "{{name}} wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "instantiation_dict": { + "name": "yjlou" + }, + "intent": "yjlou wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/dotfiles/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'yjlou')", + "required_contents": { + "must_include": [ + "Guest" + ] + } + } + ] + }, + "intent_template_id": 294 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 483, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "{{name}} wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "instantiation_dict": { + "name": "Koushik" + }, + "intent": "Koushik wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/dotfiles/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'koush')", + "required_contents": { + "must_include": [ + "Guest" + ] + } + } + ] + }, + "intent_template_id": 294 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 484, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "{{name}} wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "instantiation_dict": { + "name": "Jakub Klinkovsk\u00fd" + }, + "intent": "Jakub Klinkovsk\u00fd wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/dotfiles/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'lahwaacz')", + "required_contents": { + "must_include": [ + "Guest" + ] + } + } + ] + }, + "intent_template_id": 294 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 485, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "{{name}} wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "instantiation_dict": { + "name": "Vinta" + }, + "intent": "Vinta wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/dotfiles/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'vinta')", + "required_contents": { + "must_include": [ + "Guest" + ] + } + } + ] + }, + "intent_template_id": 294 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 486, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Change the page title of \"{{old-heading}}\" page on my site to \"{{heading}}\".", + "instantiation_dict": { + "old-heading": "404 Not Found", + "heading": "Bruh bro you clicked the wrong page" + }, + "intent": "Change the page title of \"404 Not Found\" page on my site to \"Bruh bro you clicked the wrong page\".", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/cms/page/edit/page_id/1/", + "locator": "document.querySelector('input[name=\"title\"').value", + "required_contents": { + "exact_match": "Bruh bro you clicked the wrong page" + } + } + ] + }, + "intent_template_id": 275 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 487, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Change the page title of \"{{old-heading}}\" page on my site to \"{{heading}}\".", + "instantiation_dict": { + "old-heading": "Enable Cookies", + "heading": "Cookie monster coming to your place" + }, + "intent": "Change the page title of \"Enable Cookies\" page on my site to \"Cookie monster coming to your place\".", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/cms/page/edit/page_id/3/", + "locator": "document.querySelector('input[name=\"title\"').value", + "required_contents": { + "exact_match": "Cookie monster coming to your place" + } + } + ] + }, + "intent_template_id": 275 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 488, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Change the page title of \"{{old-heading}}\" page on my site to \"{{heading}}\".", + "instantiation_dict": { + "old-heading": "Home Page", + "heading": "This is the home page!! Leave here!!" + }, + "intent": "Change the page title of \"Home Page\" page on my site to \"This is the home page!! Leave here!!\".", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/cms/page/edit/page_id/2/", + "locator": "document.querySelector('input[name=\"title\"').value", + "required_contents": { + "exact_match": "This is the home page!! Leave here!!" + } + } + ] + }, + "intent_template_id": 275 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 489, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Change the page title of \"{{old-heading}}\" page on my site to \"{{heading}}\".", + "instantiation_dict": { + "old-heading": "Privacy Policy", + "heading": "No privacy policy is needed is this dystopian world" + }, + "intent": "Change the page title of \"Privacy Policy\" page on my site to \"No privacy policy is needed is this dystopian world\".", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/cms/page/edit/page_id/4/", + "locator": "document.querySelector('input[name=\"title\"').value", + "required_contents": { + "exact_match": "No privacy policy is needed is this dystopian world" + } + } + ] + }, + "intent_template_id": 275 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 490, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Change the page title of \"{{old-heading}}\" page on my site to \"{{heading}}\".", + "instantiation_dict": { + "old-heading": "About us", + "heading": "Secret" + }, + "intent": "Change the page title of \"About us\" page on my site to \"Secret\".", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/cms/page/edit/page_id/5/", + "locator": "document.querySelector('input[name=\"title\"').value", + "required_contents": { + "exact_match": "Secret" + } + } + ] + }, + "intent_template_id": 275 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 491, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Notify {{name}} in their most recent pending order with message \"{{message}}\"", + "instantiation_dict": { + "name": "Sarah Miller", + "message": "the order is ready to be shipped soon!" + }, + "intent": "Notify Sarah Miller in their most recent pending order with message \"the order is ready to be shipped soon!\"", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "System message: We cannot add order history." + }, + "intent_template_id": 280 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 492, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Notify {{name}} in their most recent pending order with message \"{{message}}\"", + "instantiation_dict": { + "name": "Jane Doe", + "message": "sorry we are out of stock, please reorder" + }, + "intent": "Notify Jane Doe in their most recent pending order with message \"sorry we are out of stock, please reorder\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/302/", + "locator": "document.querySelector(\"#order_history_block\").querySelector(\".note-list\").firstElementChild.querySelector(\".note-list-comment\").outerText", + "required_contents": { + "exact_match": "sorry we are out of stock, please reorder" + } + } + ] + }, + "intent_template_id": 280 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 493, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Notify {{name}} in their most recent pending order with message \"{{message}}\"", + "instantiation_dict": { + "name": "Grace Nguyen", + "message": "sorry we are bankrupt, please contact our customer service for refund" + }, + "intent": "Notify Grace Nguyen in their most recent pending order with message \"sorry we are bankrupt, please contact our customer service for refund\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/307/", + "locator": "document.querySelector(\"#order_history_block\").querySelector(\".note-list\").firstElementChild.querySelector(\".note-list-comment\").outerText", + "required_contents": { + "exact_match": "sorry we are bankrupt, please contact our customer service for refund" + } + } + ] + }, + "intent_template_id": 280 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 494, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Notify {{name}} in their most recent pending order with message \"{{message}}\"", + "instantiation_dict": { + "name": "Alex Thomas", + "message": "Yo, your order will be shipped soon!" + }, + "intent": "Notify Alex Thomas in their most recent pending order with message \"Yo, your order will be shipped soon!\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/304/", + "locator": "document.querySelector(\"#order_history_block\").querySelector(\".note-list\").firstElementChild.querySelector(\".note-list-comment\").outerText", + "required_contents": { + "exact_match": "Yo, your order will be shipped soon!" + } + } + ] + }, + "intent_template_id": 280 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 495, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Notify {{name}} in their most recent pending order with message \"{{message}}\"", + "instantiation_dict": { + "name": "Lily Potter", + "message": "Thanks, your order is ready to be shipped!" + }, + "intent": "Notify Lily Potter in their most recent pending order with message \"Thanks, your order is ready to be shipped!\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/303/", + "locator": "document.querySelector(\"#order_history_block\").querySelector(\".note-list\").firstElementChild.querySelector(\".note-list-comment\").outerText", + "required_contents": { + "exact_match": "Thanks, your order is ready to be shipped!" + } + } + ] + }, + "intent_template_id": 280 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 496, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Update order #{{order}} with the {{service}} tracking number {{tracking}}", + "instantiation_dict": { + "tracking": "8974568499", + "order": "299", + "service": "Federal Express" + }, + "intent": "Update order #299 with the Federal Express tracking number 8974568499", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/commentsHistory/order_id/299/active_tab/order_shipments/", + "locator": "", + "required_contents": { + "must_include": [ + "Tracking number 8974568499 for Federal Express assigned" + ] + } + } + ] + }, + "intent_template_id": 284 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 497, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Update order #{{order}} with the {{service}} tracking number {{tracking}}", + "instantiation_dict": { + "tracking": "24353446464", + "order": "307", + "service": "DHL" + }, + "intent": "Update order #307 with the DHL tracking number 24353446464", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/commentsHistory/order_id/307/active_tab/order_shipments/", + "locator": "", + "required_contents": { + "must_include": [ + "Tracking number 24353446464 for DHL assigned" + ] + } + } + ] + }, + "intent_template_id": 284 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 498, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Update order #{{order}} with the {{service}} tracking number {{tracking}}", + "instantiation_dict": { + "tracking": "55591023930", + "order": "306", + "service": "UPS" + }, + "intent": "Update order #306 with the UPS tracking number 55591023930", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/commentsHistory/order_id/306/active_tab/order_shipments/", + "locator": "", + "required_contents": { + "must_include": [ + "Tracking number 55591023930 for United Parcel Service assigned" + ] + } + } + ] + }, + "intent_template_id": 284 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 499, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Update order #{{order}} with the {{service}} tracking number {{tracking}}", + "instantiation_dict": { + "tracking": "13849373987", + "order": "304", + "service": "USPS" + }, + "intent": "Update order #304 with the USPS tracking number 13849373987", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/commentsHistory/order_id/304/active_tab/order_shipments/", + "locator": "", + "required_contents": { + "must_include": [ + "Tracking number 13849373987 for United States Postal Service assigned" + ] + } + } + ] + }, + "intent_template_id": 284 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 500, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Update order #{{order}} with the {{service}} tracking number {{tracking}}", + "instantiation_dict": { + "tracking": "239028439840", + "order": "301", + "service": "DHL" + }, + "intent": "Update order #301 with the DHL tracking number 239028439840", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/commentsHistory/order_id/301/active_tab/order_shipments/", + "locator": "", + "required_contents": { + "must_include": [ + "Tracking number 239028439840 for DHL assigned" + ] + } + } + ] + }, + "intent_template_id": 284 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 501, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Make all {{product}} as out of stock", + "instantiation_dict": { + "product": "Taurus Elements Shell" + }, + "intent": "Make all Taurus Elements Shell as out of stock", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/350/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value", + "required_contents": { + "exact_match": "0" + } + } + ] + }, + "intent_template_id": 287 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 502, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Make all {{product}} as out of stock", + "instantiation_dict": { + "product": "Gobi HeatTec Tee" + }, + "intent": "Make all Gobi HeatTec Tee as out of stock", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/446/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value", + "required_contents": { + "exact_match": "0" + } + } + ] + }, + "intent_template_id": 287 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 503, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Make all {{product}} as out of stock", + "instantiation_dict": { + "product": "rocco gym tank" + }, + "intent": "Make all rocco gym tank as out of stock", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/682/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value", + "required_contents": { + "exact_match": "0" + } + } + ] + }, + "intent_template_id": 287 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 504, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Make all {{product}} as out of stock", + "instantiation_dict": { + "product": "Selene yoga hoodie" + }, + "intent": "Make all Selene yoga hoodie as out of stock", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1108/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value", + "required_contents": { + "exact_match": "0" + } + } + ] + }, + "intent_template_id": 287 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 505, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Make all {{product}} as out of stock", + "instantiation_dict": { + "product": "Aeno capri" + }, + "intent": "Make all Aeno capri as out of stock", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1861/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value", + "required_contents": { + "exact_match": "0" + } + } + ] + }, + "intent_template_id": 287 + }, + { + "sites": [ + "shopping" + ], + "task_id": 506, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy the highest rated product from the {{product_category}} category within a budget {{dollar_value}}.", + "instantiation_dict": { + "product_category": "meat substitute", + "dollar_value": "between 100 and 200" + }, + "intent": "Buy the highest rated product from the meat substitute category within a budget between 100 and 200.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B01CTR3DLE" + ] + } + } + ] + }, + "intent_template_id": 172 + }, + { + "sites": [ + "shopping" + ], + "task_id": 507, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy the highest rated product from the {{product_category}} category within a budget {{dollar_value}}.", + "instantiation_dict": { + "product_category": "Ceiling light", + "dollar_value": "above 1000" + }, + "intent": "Buy the highest rated product from the Ceiling light category within a budget above 1000.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07BVL3P1V" + ] + } + } + ] + }, + "intent_template_id": 172 + }, + { + "sites": [ + "shopping" + ], + "task_id": 508, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy the highest rated product from the {{product_category}} category within a budget {{dollar_value}}.", + "instantiation_dict": { + "product_category": "NS switch pouch", + "dollar_value": "under 60" + }, + "intent": "Buy the highest rated product from the NS switch pouch category within a budget under 60.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B07116LGP6" + ] + } + } + ] + }, + "intent_template_id": 172 + }, + { + "sites": [ + "shopping" + ], + "task_id": 509, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy the best rating product from \"{{category}}\" category with at least 5 reviews and the product is least expensive", + "instantiation_dict": { + "category": "Men's shoe" + }, + "intent": "Buy the best rating product from \"Men's shoe\" category with at least 5 reviews and the product is least expensive", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B01J4MM3KO" + ] + } + } + ] + }, + "intent_template_id": 216 + }, + { + "sites": [ + "shopping" + ], + "task_id": 510, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy the best rating product from \"{{category}}\" category with at least 5 reviews and the product is least expensive", + "instantiation_dict": { + "category": "Home Audio Speaker" + }, + "intent": "Buy the best rating product from \"Home Audio Speaker\" category with at least 5 reviews and the product is least expensive", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B002R5ABIW" + ] + } + } + ] + }, + "intent_template_id": 216 + }, + { + "sites": [ + "shopping" + ], + "task_id": 511, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add a {{product}} to my wish list.", + "instantiation_dict": { + "product": "laundry detergent" + }, + "intent": "Add a laundry detergent to my wish list.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "laundry", + "detergent" + ] + } + } + ] + }, + "intent_template_id": 189 + }, + { + "sites": [ + "shopping" + ], + "task_id": 512, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add a {{product}} to my wish list.", + "instantiation_dict": { + "product": "toothpaste" + }, + "intent": "Add a toothpaste to my wish list.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "toothpaste" + ] + } + } + ] + }, + "intent_template_id": 189 + }, + { + "sites": [ + "shopping" + ], + "task_id": 513, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add a {{product}} to my wish list.", + "instantiation_dict": { + "product": "chair" + }, + "intent": "Add a chair to my wish list.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "chair" + ] + } + } + ] + }, + "intent_template_id": 189 + }, + { + "sites": [ + "shopping" + ], + "task_id": 514, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add a {{product}} to my wish list.", + "instantiation_dict": { + "product": "white desk" + }, + "intent": "Add a white desk to my wish list.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "white", + "desk" + ] + } + } + ] + }, + "intent_template_id": 189 + }, + { + "sites": [ + "shopping" + ], + "task_id": 515, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add a {{product}} to my wish list.", + "instantiation_dict": { + "product": "white computer desk" + }, + "intent": "Add a white computer desk to my wish list.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "white", + "computer", + "desk" + ] + } + } + ] + }, + "intent_template_id": 189 + }, + { + "sites": [ + "shopping" + ], + "task_id": 516, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/elmwood-inn-fine-teas-orange-vanilla-caffeine-free-fruit-infusion-16-ounce-pouch.html", + "geolocation": null, + "intent_template": "Add this product to my wishlist", + "instantiation_dict": {}, + "intent": "Add this product to my wishlist", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "Elmwood Inn Fine Teas, Orange Vanilla Caffeine-free Fruit Infusion, 16-Ounce Pouch" + ] + } + } + ] + }, + "intent_template_id": 196 + }, + { + "sites": [ + "shopping" + ], + "task_id": 517, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/skinit-decal-gaming-skin-compatible-with-xbox-one-s-console-and-controller-bundle-officially-licensed-nfl-baltimore-ravens-design.html", + "geolocation": null, + "intent_template": "Add this product to my wishlist", + "instantiation_dict": {}, + "intent": "Add this product to my wishlist", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "Skinit Decal Gaming Skin Compatible with Xbox One S Console and Controller Bundle - Officially Licensed NFL Baltimore Ravens Design" + ] + } + } + ] + }, + "intent_template_id": 196 + }, + { + "sites": [ + "shopping" + ], + "task_id": 518, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sceptre-e195bd-srr-19-inch-720p-led-tv-true-black-2017.html", + "geolocation": null, + "intent_template": "Add this product to my wishlist", + "instantiation_dict": {}, + "intent": "Add this product to my wishlist", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "Sceptre E195BD-SRR 19-Inch 720P LED TV, True Black (2017)" + ] + } + } + ] + }, + "intent_template_id": 196 + }, + { + "sites": [ + "shopping" + ], + "task_id": 519, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/iphone-13-pro-max-case-neon-turtle-iphone-13-pro-max-cases-tempered-glass-back-soft-silicone-tpu-shock-protective-case-for-apple-iphone-13-pro-max.html", + "geolocation": null, + "intent_template": "Add this product to my wishlist", + "instantiation_dict": {}, + "intent": "Add this product to my wishlist", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "iPhone 13 Pro Max Case, Neon Turtle iPhone 13 Pro Max Cases, Tempered Glass Back+Soft Silicone TPU Shock Protective Case for Apple iPhone 13 Pro Max" + ] + } + } + ] + }, + "intent_template_id": 196 + }, + { + "sites": [ + "shopping" + ], + "task_id": 520, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/magnetic-metal-stainless-steel-d-pads-kits-directional-pad-replacement-parts-for-xbox-one-elite-controller-elite-series-2-xbox-one-xbox-one-s-x-controller.html", + "geolocation": null, + "intent_template": "Add this product to my wishlist", + "instantiation_dict": {}, + "intent": "Add this product to my wishlist", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "Magnetic Metal Stainless Steel D-pads Kits Directional Pad Replacement Parts for Xbox One Elite Controller, Elite Series 2, Xbox One, Xbox One S/X Controller" + ] + } + } + ] + }, + "intent_template_id": 196 + }, + { + "sites": [ + "shopping" + ], + "task_id": 521, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Subscribe to the newsletter of OneStopMarket", + "instantiation_dict": {}, + "intent": "Subscribe to the newsletter of OneStopMarket", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/newsletter/manage/", + "locator": "document.querySelector('[title=\"General Subscription\"').checked.toString()", + "required_contents": { + "exact_match": "true" + } + } + ] + }, + "intent_template_id": 199 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 522, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Fork {{repo}}.", + "instantiation_dict": { + "repo": "all repos from facebook" + }, + "intent": "Fork all repos from facebook.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/dashboard/projects", + "locator": "document.querySelector('[data-qa-selector=\"projects_list\"').outerText", + "required_contents": { + "must_include": [ + "create-react-app", + "buck" + ] + } + } + ] + }, + "intent_template_id": 352 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 523, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Star the top {{number}} most stared repos in Gitlab", + "instantiation_dict": { + "number": "five" + }, + "intent": "Star the top five most stared repos in Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": { + "must_include": [ + "AndroidSlidingUpPanel", + "create-react-app", + "ffmpeg-python", + "PHP_XLSXWriter", + "AndroidAsync" + ] + } + } + ] + }, + "intent_template_id": 354 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 524, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Star the top {{number}} most stared repos in Gitlab", + "instantiation_dict": { + "number": "eight" + }, + "intent": "Star the top eight most stared repos in Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": { + "must_include": [ + "AndroidSlidingUpPanel", + "create-react-app", + "ffmpeg-python", + "PHP_XLSXWriter", + "AndroidAsync", + "Pytorch-GAN", + "administrate", + "keycloak" + ] + } + } + ] + }, + "intent_template_id": 354 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 525, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Star the top {{number}} most stared repos in Gitlab", + "instantiation_dict": { + "number": "four" + }, + "intent": "Star the top four most stared repos in Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": { + "must_include": [ + "AndroidSlidingUpPanel", + "create-react-app", + "ffmpeg-python", + "PHP_XLSXWriter" + ] + } + } + ] + }, + "intent_template_id": 354 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 526, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Star the top {{number}} most stared repos in Gitlab", + "instantiation_dict": { + "number": "three" + }, + "intent": "Star the top three most stared repos in Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": { + "must_include": [ + "AndroidSlidingUpPanel", + "create-react-app", + "ffmpeg-python" + ] + } + } + ] + }, + "intent_template_id": 354 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 527, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Star the top {{number}} most stared repos in Gitlab", + "instantiation_dict": { + "number": "one" + }, + "intent": "Star the top one most stared repos in Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": { + "must_include": [ + "AndroidSlidingUpPanel" + ] + } + } + ] + }, + "intent_template_id": 354 + }, + { + "sites": [ + "shopping" + ], + "task_id": 528, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Draft a refund message via their \"contact us\" form for the {{product}} I bought {{time}}. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "instantiation_dict": { + "product": "phone screen protector", + "time": "March 2023" + }, + "intent": "Draft a refund message via their \"contact us\" form for the phone screen protector I bought March 2023. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "refund", + "it broke after three days of use", + "000000180", + "12.99" + ] + } + } + ] + }, + "intent_template_id": 154 + }, + { + "sites": [ + "shopping" + ], + "task_id": 529, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Draft a refund message via their \"contact us\" form for the {{product}} I bought {{time}}. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "instantiation_dict": { + "product": "bluetooth speaker", + "time": "Feb 2023" + }, + "intent": "Draft a refund message via their \"contact us\" form for the bluetooth speaker I bought Feb 2023. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "refund", + "it broke after three days of use", + "000000148", + "169.95" + ] + } + } + ] + }, + "intent_template_id": 154 + }, + { + "sites": [ + "shopping" + ], + "task_id": 530, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Draft a refund message via their \"contact us\" form for the {{product}} I bought {{time}}. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "instantiation_dict": { + "product": "kitchen organizer", + "time": "around Feb 2023" + }, + "intent": "Draft a refund message via their \"contact us\" form for the kitchen organizer I bought around Feb 2023. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "refund", + "it broke after three days of use", + "000000161", + "68.88" + ] + } + } + ] + }, + "intent_template_id": 154 + }, + { + "sites": [ + "shopping" + ], + "task_id": 531, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Draft a refund message via their \"contact us\" form for the {{product}} I bought {{time}}. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "instantiation_dict": { + "product": "phone case", + "time": "March 2023" + }, + "intent": "Draft a refund message via their \"contact us\" form for the phone case I bought March 2023. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "refund", + "it broke after three days of use", + "000000180", + "$12.99" + ] + } + } + ] + }, + "intent_template_id": 154 + }, + { + "sites": [ + "shopping" + ], + "task_id": 532, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Draft a refund message via their \"contact us\" form for the {{product}} I bought {{time}}. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "instantiation_dict": { + "product": "PS3 remote controller", + "time": "early 2023" + }, + "intent": "Draft a refund message via their \"contact us\" form for the PS3 remote controller I bought early 2023. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "refund", + "it broke after three days of use", + "000000180", + "1.63" + ] + } + } + ] + }, + "intent_template_id": 154 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 533, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Follow {{account_list}} on Gitlab", + "instantiation_dict": { + "account_list": [ + "convexegg", + "yjlou" + ] + }, + "intent": "Follow ['convexegg', 'yjlou'] on Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": { + "must_include": [ + "@convexegg", + "@yjlou" + ] + } + } + ] + }, + "intent_template_id": 330 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 534, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Follow {{account_list}} on Gitlab", + "instantiation_dict": { + "account_list": [ + "Jakub Klinkovsk\u00fd", + "Koushik", + "Vinta Chen" + ] + }, + "intent": "Follow ['Jakub Klinkovsk\u00fd', 'Koushik', 'Vinta Chen'] on Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": { + "must_include": [ + "@lahwaacz", + "@koush", + "@vinta" + ] + } + } + ] + }, + "intent_template_id": 330 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 535, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Follow {{account_list}} on Gitlab", + "instantiation_dict": { + "account_list": [ + "Jakub K", + "ghost", + "Beno\u00eet Blanchon" + ] + }, + "intent": "Follow ['Jakub K', 'ghost', 'Beno\u00eet Blanchon'] on Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": { + "must_include": [ + "@lahwaacz", + "@ghost", + "@bblanchon" + ] + } + } + ] + }, + "intent_template_id": 330 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 536, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Follow {{account_list}} on Gitlab", + "instantiation_dict": { + "account_list": [ + "ghost", + "R1kk3r", + "Abishek" + ] + }, + "intent": "Follow ['ghost', 'R1kk3r', 'Abishek'] on Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": { + "must_include": [ + "@lahwaacz", + "@R1kk3r", + "@abisubramanya27" + ] + } + } + ] + }, + "intent_template_id": 330 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 537, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Follow {{account_list}} on Gitlab", + "instantiation_dict": { + "account_list": [ + "Jakub Klinkovsk", + "convexegg", + "Vinta Chen", + "yjlou", + "Abishek S" + ] + }, + "intent": "Follow ['Jakub Klinkovsk', 'convexegg', 'Vinta Chen', 'yjlou', 'Abishek S'] on Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": { + "must_include": [ + "@lahwaacz", + "@convexegg", + "@vinta", + "@yjlou", + "@abisubramanya27" + ] + } + } + ] + }, + "intent_template_id": 330 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 538, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Modify the address of order #{{order_id}} to {{address}}", + "instantiation_dict": { + "order_id": "299", + "address": "456 Oak Avenue, Apartment 5B, New York, NY, 10001" + }, + "intent": "Modify the address of order #299 to 456 Oak Avenue, Apartment 5B, New York, NY, 10001", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/299", + "locator": "", + "required_contents": { + "must_include": [ + "456 Oak Avenue", + "Apartment 5B", + "New York", + "10001" + ] + } + } + ] + }, + "intent_template_id": 240 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 539, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Modify the address of order #{{order_id}} to {{address}}", + "instantiation_dict": { + "order_id": "65", + "address": "789 Pine Lane, San Francisco, CA, 94102" + }, + "intent": "Modify the address of order #65 to 789 Pine Lane, San Francisco, CA, 94102", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/65", + "locator": "", + "required_contents": { + "must_include": [ + "789 Pine Lane", + "San Francisco", + "California", + "94102" + ] + } + } + ] + }, + "intent_template_id": 240 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 540, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Modify the address of order #{{order_id}} to {{address}}", + "instantiation_dict": { + "order_id": "301", + "address": "321 Birch Boulevard, Suite 200, Dallas, TX, 75201" + }, + "intent": "Modify the address of order #301 to 321 Birch Boulevard, Suite 200, Dallas, TX, 75201", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/301", + "locator": "", + "required_contents": { + "must_include": [ + "321 Birch Boulevard", + "Suite 200", + "Dallas", + "Texas", + "75201" + ] + } + } + ] + }, + "intent_template_id": 240 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 541, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Modify the address of order #{{order_id}} to {{address}}", + "instantiation_dict": { + "order_id": "125", + "address": "654 Elm Drive, Apartment 12, Miami, FL, 33101" + }, + "intent": "Modify the address of order #125 to 654 Elm Drive, Apartment 12, Miami, FL, 33101", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/125", + "locator": "", + "required_contents": { + "must_include": [ + "654 Elm Drive", + "Apartment 12", + "Miami", + "Florida", + "33101" + ] + } + } + ] + }, + "intent_template_id": 240 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 542, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Modify the address of order #{{order_id}} to {{address}}", + "instantiation_dict": { + "order_id": "300", + "address": "987 Cedar Court, Los Angeles, CA, 90012" + }, + "intent": "Modify the address of order #300 to 987 Cedar Court, Los Angeles, CA, 90012", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/300", + "locator": "", + "required_contents": { + "must_include": [ + "987 Cedar Court", + "Los Angeles", + "California", + "90012" + ] + } + } + ] + }, + "intent_template_id": 240 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 543, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Update the product description of {{product}} to highlight the real user positive reviews by quoting the comments", + "instantiation_dict": { + "product": "Bella Tank" + }, + "intent": "Update the product description of Bella Tank to highlight the real user positive reviews by quoting the comments", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/../bella-tank.html", + "locator": "document.querySelector('.data.item.content').outerText + (document.querySelector('.product.attribute.overview [itemprop=\"description\"]')?.outerText || '')", + "required_contents": { + "must_include": [ + "Good choice for working out and stylin' enough to wear when I'm hanging with friends on hot days", + "Also washes really well", + "Always a sweet n sporty look for the gym", + "Keeps me cool and the seams don't rub up against me like some of my other tanks" + ] + } + } + ] + }, + "intent_template_id": 251 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 544, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Update the description of {{product}} to highlight the real user positive reviews by quoting the comments", + "instantiation_dict": { + "product": "Selena Yoga Hoodie" + }, + "intent": "Update the description of Selena Yoga Hoodie to highlight the real user positive reviews by quoting the comments", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/../selene-yoga-hoodie.html", + "locator": "document.querySelector('.data.item.content').outerText + (document.querySelector('.product.attribute.overview [itemprop=\"description\"]')?.outerText || '')", + "required_contents": { + "must_include": [ + "I was super cold and it did the job.", + "The sleeves are definitely thicker than you realize, which is a good thing", + "really quite substantial", + "planning on buying another one of these in another color", + "the best hoodie ive ever owned" + ] + } + } + ] + }, + "intent_template_id": 251 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 545, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Update the description of {{product}} to highlight the real user positive reviews by quoting the comments", + "instantiation_dict": { + "product": "Radiant Tee" + }, + "intent": "Update the description of Radiant Tee to highlight the real user positive reviews by quoting the comments", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/../radiant-tee.html", + "locator": "document.querySelector('.data.item.content').outerText + (document.querySelector('.product.attribute.overview [itemprop=\"description\"]')?.outerText || '')", + "required_contents": { + "must_include": [ + "What I rally love here is that it does the job of keeping me cool and dry", + "I'm a big guy and sweat A LOT", + "Even after a day of gulf, I'm still dry and comfortable", + "What a versatile shirt", + "Not only does it feel very soft compared to my old worn out polos, but it also does the job promised", + "I like going out after my game for drinks so I look good then too and don't need to change into something fresh" + ] + } + } + ] + }, + "intent_template_id": 251 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 546, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Update the description of {{product}} to highlight the real user positive reviews by quoting the comments", + "instantiation_dict": { + "product": "Lucia Cross-Fit Bra" + }, + "intent": "Update the description of Lucia Cross-Fit Bra to highlight the real user positive reviews by quoting the comments", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/../affirm-water-bottle.html", + "locator": "document.querySelector('.data.item.content').outerText + (document.querySelector('.product.attribute.overview [itemprop=\"description\"]')?.outerText || '')", + "required_contents": { + "must_include": [ + "Wide mouth opening makes it easy to clean" + ] + } + } + ] + }, + "intent_template_id": 251 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 547, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Add a new {{option}} option {{value}} to the {{base_setting}} of {{product}}", + "instantiation_dict": { + "option": "color", + "value": "brown", + "base_setting": "size S", + "product": "Phoebe Zipper Sweatshirt" + }, + "intent": "Add a new color option brown to the size S of Phoebe Zipper Sweatshirt", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1130/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": { + "must_include": [ + "Phoebe Zipper Sweatshirt-S-Brown" + ] + } + } + ] + }, + "intent_template_id": 252 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 548, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Add a new {{option}} {{value}} to {{base_setting}} of {{product}}", + "instantiation_dict": { + "option": "color", + "value": "blue", + "base_setting": "size S and M", + "product": "Frankie Sweatshirt" + }, + "intent": "Add a new color blue to size S and M of Frankie Sweatshirt", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/110/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": { + "must_include": [ + "Sweatshirt-M-Blue", + "Sweatshirt-S-Blue" + ] + } + } + ] + }, + "intent_template_id": 252 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 549, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Add a new {{option}} {{value}} to {{base_setting}} {{product}}", + "instantiation_dict": { + "option": "size", + "value": "XXXL", + "base_setting": "green", + "product": "Minerva LumaTech V-Tee" + }, + "intent": "Add a new size XXXL to green Minerva LumaTech V-Tee", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1492/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": { + "must_include": [ + "V-Tee-XXXL-Green" + ] + } + } + ] + }, + "intent_template_id": 252 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 550, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Add a new {{option}} {{value}} to {{base_setting}} {{product}}", + "instantiation_dict": { + "option": "size", + "value": "XXS", + "base_setting": "blue and purple", + "product": "Nona Fitness Tank" + }, + "intent": "Add a new size XXS to blue and purple Nona Fitness Tank", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1732/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": { + "must_include": [ + "Tank-XXS-Blue", + "Tank-XXS-Purple" + ] + } + } + ] + }, + "intent_template_id": 252 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 551, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Add new {{option}} {{value}} to {{base_setting}} of {{product}}", + "instantiation_dict": { + "option": "size", + "value": "30 and 31", + "base_setting": "all color variants", + "product": "Diana Tights" + }, + "intent": "Add new size 30 and 31 to all color variants of Diana Tights", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1854/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": { + "must_include": [ + "Tights-30-Blue", + "Tights-30-Black", + "Tights-30-Orange", + "Tights-31-Blue", + "Tights-31-Black", + "Tights-31-Orange" + ] + } + } + ] + }, + "intent_template_id": 252 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 552, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a folder named {{directory}} in {{gitlab_repo}} repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the {{subreddit}}?", + "instantiation_dict": { + "directory": "real_space", + "subreddit": "space", + "gitlab_repo": "gimmiethat.space" + }, + "intent": "Create a folder named real_space in gimmiethat.space repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the space?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/real_space/urls.txt", + "locator": "", + "required_contents": { + "must_include": [ + "__REDDIT__/f/space/134164/scientists-erupt-at-nasa-gutting-funding-for-crucial-venus", + "__REDDIT__/f/space/134163/virgin-orbit-fails-to-secure-funding-will-cease-operations", + "__REDDIT__/f/space/134162/nasa-to-name-artemis-2-crew-next-week-the-first-moon", + "__REDDIT__/f/space/134161/bent-light-in-deep-space-reveals-one-of-the-biggest-black", + "__REDDIT__/f/space/134160/seti-s-new-machine-learning-algorithm-works-like-google-s" + ] + } + } + ] + }, + "intent_template_id": 84 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 553, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a folder named {{directory}} in {{gitlab_repo}} repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the {{subreddit}}?", + "instantiation_dict": { + "directory": "news", + "gitlab_repo": "gimmiethat.space", + "subreddit": "news related subreddits" + }, + "intent": "Create a folder named news in gimmiethat.space repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the news related subreddits?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/news/urls.txt", + "locator": "", + "required_contents": { + "must_include": [ + "__REDDIT__/f/news/129905/ohio-man-charged-for-using-molotov-cocktails-to-attack", + "__REDDIT__/f/news/129904/in-a-loss-for-fox-news-judge-allows-dominion-s-defamation", + "__REDDIT__/f/news/129903/theater-group-sues-to-block-tennessee-s-new-anti-drag-law", + "__REDDIT__/f/news/129902/andrew-tate-released-from-jail-in-romania-and-placed-under", + "__REDDIT__/f/news/129901/rare-high-risk-storm-alert-issued-for-parts-of-midwest-and" + ] + } + } + ] + }, + "intent_template_id": 84 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 554, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a folder named {{directory}} in {{gitlab_repo}} repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the {{subreddit}}?", + "instantiation_dict": { + "directory": "moive_space", + "gitlab_repo": "gimmiethat.space", + "subreddit": "movies" + }, + "intent": "Create a folder named moive_space in gimmiethat.space repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the movies?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/moive_space/urls.txt", + "locator": "", + "required_contents": { + "must_include": [ + "__REDDIT__/f/movies/128825/scenes-in-film-that-feel-off-or-wrong-in-some-way-and-make", + "__REDDIT__/f/movies/128824/disney-s-live-action-lilo-amp-stitch-movie-finds-its-lilo-in", + "__REDDIT__/f/movies/128823/fantastic-four-movie-gets-new-writer-with-avatar-the-way-of", + "__REDDIT__/f/movies/128822/can-someone-explain-what-made-steven-seagal-so-appealing-for", + "__REDDIT__/f/movies/128821/ban-on-fetish-sex-depictions-in-film-should-end-australia" + ] + } + } + ] + }, + "intent_template_id": 84 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 555, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a folder named {{directory}} in {{gitlab_repo}} repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the {{subreddit}}?", + "instantiation_dict": { + "directory": "funny_pic", + "gitlab_repo": "gimmiethat.space", + "subreddit": "memes" + }, + "intent": "Create a folder named funny_pic in gimmiethat.space repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the memes?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/funny_pic/urls.txt", + "locator": "", + "required_contents": { + "must_include": [ + "__REDDIT__/f/memes/127991/it-do-be-like-that-tho", + "__REDDIT__/f/memes/127990/thank-you-memers-this-wouldn-t-be-possible-without-you", + "__REDDIT__/f/memes/127989/if-you-have-no-other-choice", + "__REDDIT__/f/memes/127988/yes-yes-yes", + "__REDDIT__/f/memes/127987/shagadelic-baby" + ] + } + } + ] + }, + "intent_template_id": 84 + }, + { + "sites": [ + "gitlab", + "wikipedia" + ], + "task_id": 556, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a repo named {{name}} with {{topics}} in a README file", + "instantiation_dict": { + "name": "nolan_honest_fans", + "topics": "movies directed by Christopher Nolan" + }, + "intent": "Create a repo named nolan_honest_fans with movies directed by Christopher Nolan in a README file", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/nolan_honest_fans/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "Following", + "Memento", + "Insomnia", + "Batman Begins", + "The Prestige", + "The Dark Knight", + "Inception", + "The Dark Knight Rises", + "Interstellar", + "Dunkirk", + "Tenet", + "Oppenheimer" + ] + } + } + ] + }, + "intent_template_id": 87 + }, + { + "sites": [ + "gitlab", + "wikipedia" + ], + "task_id": 557, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a repo named {{name}} with {{topics}} in a README file", + "instantiation_dict": { + "name": "nolan_old_fans", + "topics": "movies directed by Christopher Nolan before 2010" + }, + "intent": "Create a repo named nolan_old_fans with movies directed by Christopher Nolan before 2010 in a README file", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/nolan_old_fans/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "Following", + "Memento", + "Insomnia", + "Batman Begins", + "The Prestige", + "The Dark Knight" + ] + } + } + ] + }, + "intent_template_id": 87 + }, + { + "sites": [ + "gitlab", + "wikipedia" + ], + "task_id": 558, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a repo named {{name}} with {{topics}} in a README file", + "instantiation_dict": { + "name": "nolan_young_fans", + "topics": "movies directed by Christopher Nolan after 2010" + }, + "intent": "Create a repo named nolan_young_fans with movies directed by Christopher Nolan after 2010 in a README file", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/nolan_young_fans/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "Inception", + "The Dark Knight Rises", + "Interstellar", + "Dunkirk", + "Tenet", + "Oppenheimer" + ] + } + } + ] + }, + "intent_template_id": 87 + }, + { + "sites": [ + "gitlab", + "wikipedia" + ], + "task_id": 559, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a repo named {{name}} with {{topics}} in a README file", + "instantiation_dict": { + "name": "nolan_followers", + "topics": "career timeline of Christopher Nolan" + }, + "intent": "Create a repo named nolan_followers with career timeline of Christopher Nolan in a README file", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/nolan_followers/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "1993\u20132003: Early career and breakthrough", + "2003\u20132013: Widespread recognition", + "2014\u20132019: Established Hollywood auteur", + "2020\u2013present" + ] + } + } + ] + }, + "intent_template_id": 87 + }, + { + "sites": [ + "gitlab", + "wikipedia" + ], + "task_id": 560, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a repo named {{name}} with {{topics}} in a README file", + "instantiation_dict": { + "name": "nolan_academy_awards", + "topics": "movies that won Academy Awards by Christopher Nolan" + }, + "intent": "Create a repo named nolan_academy_awards with movies that won Academy Awards by Christopher Nolan in a README file", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/nolan_academy_awards/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "The Dark Knight", + "Inception", + "Interstellar", + "Dunkirk", + "Tenet" + ] + } + } + ] + }, + "intent_template_id": 87 + }, + { + "sites": [ + "gitlab", + "wikipedia" + ], + "task_id": 561, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a repo named {{name}} with {{topics}} in a README file", + "instantiation_dict": { + "name": "bafta_awards_nolan", + "topics": "movies that are nominated BAFTA Awards by Christopher Nolan" + }, + "intent": "Create a repo named bafta_awards_nolan with movies that are nominated BAFTA Awards by Christopher Nolan in a README file", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/bafta_awards_nolan/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "Batman Begins", + "The Dark Knight", + "Inception", + "The Dark Knight Rises", + "Interstellar", + "Dunkirk", + "Tenet" + ] + } + } + ] + }, + "intent_template_id": 87 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 562, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "create a repository named {{name}} that includes a README file with the links to the most active {{num}} DIY ideas on DIY subreddit?", + "instantiation_dict": { + "name": "Awesome_DIY_ideas", + "num": 6 + }, + "intent": "create a repository named Awesome_DIY_ideas that includes a README file with the links to the most active 6 DIY ideas on DIY subreddit?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/Awesome_DIY_ideas/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "__REDDIT__/f/DIY/118903/separate-glued-plastic-parts", + "__REDDIT__/f/DIY/118923/how-would-you-fix-this-dryer-vent-mess", + "__REDDIT__/f/DIY/118935/basement-bulkhead-soffit-wall-framing", + "__REDDIT__/f/DIY/118904/ge-water-heater-pilot-light-won-t-stay-lit", + "__REDDIT__/f/DIY/118960/attempting-to-move-a-wall-outlet-in-my-basement-a-few-inches", + "__REDDIT__/f/DIY/118931/afci-outlet-question" + ] + } + } + ] + }, + "intent_template_id": 88 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 563, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "create a repository named {{name}} that includes a README file with the links to the most active {{num}} DIY ideas on DIY subreddit?", + "instantiation_dict": { + "name": "fun_thing_to_do", + "num": 5 + }, + "intent": "create a repository named fun_thing_to_do that includes a README file with the links to the most active 5 DIY ideas on DIY subreddit?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/fun_thing_to_do/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "__REDDIT__/f/DIY/118903/separate-glued-plastic-parts", + "__REDDIT__/f/DIY/118923/how-would-you-fix-this-dryer-vent-mess", + "__REDDIT__/f/DIY/118935/basement-bulkhead-soffit-wall-framing", + "__REDDIT__/f/DIY/118904/ge-water-heater-pilot-light-won-t-stay-lit", + "__REDDIT__/f/DIY/118960/attempting-to-move-a-wall-outlet-in-my-basement-a-few-inches" + ] + } + } + ] + }, + "intent_template_id": 88 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 564, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "create a repository named {{name}} that includes a README file with the links to the most active {{num}} DIY ideas on DIY subreddit?", + "instantiation_dict": { + "name": "live_a_life", + "num": 3 + }, + "intent": "create a repository named live_a_life that includes a README file with the links to the most active 3 DIY ideas on DIY subreddit?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/live_a_life/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "__REDDIT__/f/DIY/118903/separate-glued-plastic-parts", + "__REDDIT__/f/DIY/118923/how-would-you-fix-this-dryer-vent-mess", + "__REDDIT__/f/DIY/118935/basement-bulkhead-soffit-wall-framing" + ] + } + } + ] + }, + "intent_template_id": 88 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 565, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "create a repository named {{name}} that includes a README file with the links to the most active {{num}} DIY ideas on DIY subreddit?", + "instantiation_dict": { + "name": "TODO", + "num": 10 + }, + "intent": "create a repository named TODO that includes a README file with the links to the most active 10 DIY ideas on DIY subreddit?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/TODO/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "__REDDIT__/f/DIY/118903/separate-glued-plastic-parts", + "__REDDIT__/f/DIY/118923/how-would-you-fix-this-dryer-vent-mess", + "__REDDIT__/f/DIY/118935/basement-bulkhead-soffit-wall-framing", + "__REDDIT__/f/DIY/118904/ge-water-heater-pilot-light-won-t-stay-lit", + "__REDDIT__/f/DIY/118960/attempting-to-move-a-wall-outlet-in-my-basement-a-few-inches", + "__REDDIT__/f/DIY/118931/afci-outlet-question", + "__REDDIT__/f/DIY/118824/teflon-tape-to-attach-washing-machine-drain-hose-to-pipe", + "__REDDIT__/f/DIY/118866/paver-base-for-shed", + "__REDDIT__/f/DIY/118820/ways-to-locate-our-buried-electrical-service", + "__REDDIT__/f/DIY/118836/how-to-eliminate-transitions-for-disability-mobility-reasons" + ] + } + } + ] + }, + "intent_template_id": 88 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 566, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "create a repository named {{name}} that includes a README file with the links to the most active {{num}} DIY ideas on DIY subreddit?", + "instantiation_dict": { + "name": "Do it myself", + "num": 8 + }, + "intent": "create a repository named Do it myself that includes a README file with the links to the most active 8 DIY ideas on DIY subreddit?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/Do-it-myself/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "__REDDIT__/f/DIY/118903/separate-glued-plastic-parts", + "__REDDIT__/f/DIY/118923/how-would-you-fix-this-dryer-vent-mess", + "__REDDIT__/f/DIY/118935/basement-bulkhead-soffit-wall-framing", + "__REDDIT__/f/DIY/118904/ge-water-heater-pilot-light-won-t-stay-lit", + "__REDDIT__/f/DIY/118960/attempting-to-move-a-wall-outlet-in-my-basement-a-few-inches", + "__REDDIT__/f/DIY/118931/afci-outlet-question", + "__REDDIT__/f/DIY/118824/teflon-tape-to-attach-washing-machine-drain-hose-to-pipe", + "__REDDIT__/f/DIY/118866/paver-base-for-shed" + ] + } + } + ] + }, + "intent_template_id": 88 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 567, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Invite {{collaborator_account_list}} as collaborator to {{repo}} repo", + "instantiation_dict": { + "collaborator_account_list": "Jakub Klinkovsk\u00fd and Beno\u00eet Blanchon", + "repo": "gimmiethat.space" + }, + "intent": "Invite Jakub Klinkovsk\u00fd and Beno\u00eet Blanchon as collaborator to gimmiethat.space repo", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@lahwaacz", + "@bblanchon" + ] + } + } + ] + }, + "intent_template_id": 293 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 568, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Invite {{collaborator_account_list}} as collaborator to {{repo}} repo", + "instantiation_dict": { + "collaborator_account_list": "Abishek and Vinta", + "repo": "a11yproject.com" + }, + "intent": "Invite Abishek and Vinta as collaborator to a11yproject.com repo", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/a11yproject/a11yproject.com/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@abisubramanya27", + "@vinta" + ] + } + } + ] + }, + "intent_template_id": 293 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 569, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Invite {{collaborator_account_list}} as collaborator to {{repo}} repo", + "instantiation_dict": { + "collaborator_account_list": "Beno\u00eet and Abishek", + "repo": "my HTML5 markup extention" + }, + "intent": "Invite Beno\u00eet and Abishek as collaborator to my HTML5 markup extention repo", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/accessible-html-content-patterns/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@bblanchon", + "@abisubramanya27" + ] + } + } + ] + }, + "intent_template_id": 293 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 570, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Invite {{collaborator_account_list}} as collaborator to {{repo}} repo", + "instantiation_dict": { + "collaborator_account_list": "Jakub K, Alex Dills, Alex Hutnik and Beno\u00eet Blanchon", + "repo": "my time tracking tool project" + }, + "intent": "Invite Jakub K, Alex Dills, Alex Hutnik and Beno\u00eet Blanchon as collaborator to my time tracking tool project repo", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/timeit/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@lahwaacz", + "@V13Axel", + "@alexhutnik", + "@bblanchon" + ] + } + } + ] + }, + "intent_template_id": 293 + }, + { + "sites": [ + "shopping" + ], + "task_id": 571, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I recently moved, my address is {{address}}, update my information on OneStopShopping accordingly", + "instantiation_dict": { + "address": "231 Willow Way, Suite 100, Chicago, IL, 60601" + }, + "intent": "I recently moved, my address is 231 Willow Way, Suite 100, Chicago, IL, 60601, update my information on OneStopShopping accordingly", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/customer/address", + "locator": "document.querySelector(\".box.box-address-billing > .box-content\").outerText", + "required_contents": { + "must_include": [ + "231 Willow Way", + "Suite 100", + "Chicago, Illinois, 60601" + ] + } + }, + { + "url": "__SHOPPING__/customer/address", + "locator": "document.querySelector(\".box.box-address-shipping > .box-content\").outerText", + "required_contents": { + "must_include": [ + "231 Willow Way", + "Suite 100", + "Chicago, Illinois, 60601" + ] + } + } + ] + }, + "intent_template_id": 165 + }, + { + "sites": [ + "shopping" + ], + "task_id": 572, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I recently moved, my address is {{address}}, update my information on OneStopShopping accordingly", + "instantiation_dict": { + "address": "654 Aspen Road, House #3, Boston, MA, 02110" + }, + "intent": "I recently moved, my address is 654 Aspen Road, House #3, Boston, MA, 02110, update my information on OneStopShopping accordingly", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/customer/address", + "locator": "document.querySelector(\".box.box-address-billing > .box-content\").outerText", + "required_contents": { + "must_include": [ + "654 Aspen Road", + "House #3", + "Boston, Massachusetts, 02110" + ] + } + }, + { + "url": "__SHOPPING__/customer/address", + "locator": "document.querySelector(\".box.box-address-shipping > .box-content\").outerText", + "required_contents": { + "must_include": [ + "654 Aspen Road", + "House #3", + "Boston, Massachusetts, 02110" + ] + } + } + ] + }, + "intent_template_id": 165 + }, + { + "sites": [ + "shopping" + ], + "task_id": 573, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I recently moved, my address is {{address}}, update my information on OneStopShopping accordingly", + "instantiation_dict": { + "address": "987 Sycamore Circle, Philadelphia, PA, 19102" + }, + "intent": "I recently moved, my address is 987 Sycamore Circle, Philadelphia, PA, 19102, update my information on OneStopShopping accordingly", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/customer/address", + "locator": "document.querySelector(\".box.box-address-shipping > .box-content\").outerText", + "required_contents": { + "must_include": [ + "987 Sycamore Circle", + "Philadelphia, Pennsylvania, 19102" + ] + } + }, + { + "url": "__SHOPPING__/customer/address", + "locator": "document.querySelector(\".box.box-address-billing > .box-content\").outerText", + "required_contents": { + "must_include": [ + "987 Sycamore Circle", + "Philadelphia, Pennsylvania, 19102" + ] + } + } + ] + }, + "intent_template_id": 165 + }, + { + "sites": [ + "shopping" + ], + "task_id": 574, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I recently moved, my address is {{address}}, update my information on OneStopShopping accordingly", + "instantiation_dict": { + "address": "111 Magnolia Path, Atlanta, GA, 30303" + }, + "intent": "I recently moved, my address is 111 Magnolia Path, Atlanta, GA, 30303, update my information on OneStopShopping accordingly", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/customer/address", + "locator": "document.querySelector(\".box.box-address-shipping > .box-content\").outerText", + "required_contents": { + "must_include": [ + "111 Magnolia Path", + "Atlanta, Georgia, 30303" + ] + } + }, + { + "url": "__SHOPPING__/customer/address", + "locator": "document.querySelector(\".box.box-address-billing > .box-content\").outerText", + "required_contents": { + "must_include": [ + "111 Magnolia Path", + "Atlanta, Georgia, 30303" + ] + } + } + ] + }, + "intent_template_id": 165 + }, + { + "sites": [ + "shopping" + ], + "task_id": 575, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I recently moved, my address is {{address}}, update my information on OneStopShopping accordingly", + "instantiation_dict": { + "address": "222 Redwood Rise, Suite 300, Seattle, WA, 98101" + }, + "intent": "I recently moved, my address is 222 Redwood Rise, Suite 300, Seattle, WA, 98101, update my information on OneStopShopping accordingly", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/customer/address", + "locator": "document.querySelector(\".box.box-address-shipping > .box-content\").outerText", + "required_contents": { + "must_include": [ + "222 Redwood Rise", + "Suite 300", + "Seattle, Washington, 98101" + ] + } + }, + { + "url": "__SHOPPING__/customer/address", + "locator": "document.querySelector(\".box.box-address-billing > .box-content\").outerText", + "required_contents": { + "must_include": [ + "222 Redwood Rise", + "Suite 300", + "Seattle, Washington, 98101" + ] + } + } + ] + }, + "intent_template_id": 165 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 576, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Add the following users to repo {{repo}} as {{role}}: {{user_list}}", + "instantiation_dict": { + "repo": "a11y-webring.club", + "role": "developer", + "user_list": [ + "abisubramanya27", + "lahwaacz" + ] + }, + "intent": "Add the following users to repo a11y-webring.club as developer: ['abisubramanya27', 'lahwaacz']", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/a11y-webring.club/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'abisubramanya27')", + "required_contents": { + "must_include": [ + "Developer" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/a11y-webring.club/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'lahwaacz')", + "required_contents": { + "must_include": [ + "Developer" + ] + } + } + ] + }, + "intent_template_id": 351 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 577, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Add the following users to my {{repo}} as {{role}}: {{user_list}}", + "instantiation_dict": { + "repo": "GitHub timeline item management extension", + "role": "maintainer", + "user_list": [ + "abisubramanya27", + "lahwaacz" + ] + }, + "intent": "Add the following users to my GitHub timeline item management extension as maintainer: ['abisubramanya27', 'lahwaacz']", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/remove-board-movement-events-from-the-github-issue-timeline/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'abisubramanya27')", + "required_contents": { + "must_include": [ + "Maintainer" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/remove-board-movement-events-from-the-github-issue-timeline/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'lahwaacz')", + "required_contents": { + "must_include": [ + "Maintainer" + ] + } + } + ] + }, + "intent_template_id": 351 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 578, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Add the following users to repo {{repo}} as {{role}}: {{user_list}}", + "instantiation_dict": { + "repo": "millennials-to-snake-people", + "role": "reporter", + "user_list": [ + "yjlou", + "a11yproject" + ] + }, + "intent": "Add the following users to repo millennials-to-snake-people as reporter: ['yjlou', 'a11yproject']", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/millennials-to-snake-people/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'yjlou')", + "required_contents": { + "must_include": [ + "Reporter" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/millennials-to-snake-people/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'a11yproject')", + "required_contents": { + "must_include": [ + "Reporter" + ] + } + } + ] + }, + "intent_template_id": 351 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 579, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Add the following users to my {{repo}} as {{role}}: {{user_list}}", + "instantiation_dict": { + "repo": "time tracking tool", + "role": "guest", + "user_list": [ + "yjlou" + ] + }, + "intent": "Add the following users to my time tracking tool as guest: ['yjlou']", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/timeit/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'yjlou')", + "required_contents": { + "must_include": [ + "Guest" + ] + } + } + ] + }, + "intent_template_id": 351 + }, + { + "sites": [ + "reddit" + ], + "task_id": 580, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a new forum named {{name}}, with a description of {{description}}, and include {{sidebar_list}} in the sidebar?", + "instantiation_dict": { + "name": "sci_fi", + "description": "A wild place for sci-fi enthusiasts", + "sidebar_list": [ + "New", + "Classic", + "Movies", + "Post my novel", + "Random" + ] + }, + "intent": "Create a new forum named sci_fi, with a description of A wild place for sci-fi enthusiasts, and include ['New', 'Classic', 'Movies', 'Post my novel', 'Random'] in the sidebar?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/sci_fi/edit", + "locator": "document.querySelector(\"#forum_description\").value", + "required_contents": { + "must_include": [ + "A wild place for sci-fi enthusiasts" + ] + } + }, + { + "url": "__REDDIT__/f/sci_fi/edit", + "locator": "document.querySelector(\"#forum_sidebar\").value", + "required_contents": { + "must_include": [ + "New", + "Classic", + "Movies", + "Post my novel", + "Random" + ] + } + } + ] + }, + "intent_template_id": 7 + }, + { + "sites": [ + "reddit" + ], + "task_id": 581, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a new forum named {{name}}, with a description of {{description}}, and include {{sidebar_list}} in the sidebar?", + "instantiation_dict": { + "name": "cmu_lti", + "description": "Language Technologies Institute at Carnegie Mellon University", + "sidebar_list": [ + "announcement", + "paper", + "alumni" + ] + }, + "intent": "Create a new forum named cmu_lti, with a description of Language Technologies Institute at Carnegie Mellon University, and include ['announcement', 'paper', 'alumni'] in the sidebar?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/cmu_lti/edit", + "locator": "document.querySelector(\"#forum_description\").value", + "required_contents": { + "must_include": [ + "Language Technologies Institute at Carnegie Mellon University" + ] + } + }, + { + "url": "__REDDIT__/f/cmu_lti/edit", + "locator": "document.querySelector(\"#forum_sidebar\").value", + "required_contents": { + "must_include": [ + "announcement", + "paper", + "alumni" + ] + } + } + ] + }, + "intent_template_id": 7 + }, + { + "sites": [ + "reddit" + ], + "task_id": 582, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a new forum named {{name}}, with a description of {{description}}, and include {{sidebar_list}} in the sidebar?", + "instantiation_dict": { + "name": "Cyberpunk", + "description": "Welcome to the future", + "sidebar_list": [ + "Games", + "Books", + "Movies", + "Future" + ] + }, + "intent": "Create a new forum named Cyberpunk, with a description of Welcome to the future, and include ['Games', 'Books', 'Movies', 'Future'] in the sidebar?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/Cyberpunk/edit", + "locator": "document.querySelector(\"#forum_description\").value", + "required_contents": { + "must_include": [ + "Welcome to the future" + ] + } + }, + { + "url": "__REDDIT__/f/Cyberpunk/edit", + "locator": "document.querySelector(\"#forum_sidebar\").value", + "required_contents": { + "must_include": [ + "Games", + "Books", + "Movies", + "Future" + ] + } + } + ] + }, + "intent_template_id": 7 + }, + { + "sites": [ + "reddit" + ], + "task_id": 583, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a new forum named {{name}}, with a description of {{description}}, and include {{sidebar_list}} in the sidebar?", + "instantiation_dict": { + "name": "PlantsForCatParents", + "description": "Cat parents & plan lovers", + "sidebar_list": [ + "Cat friendly", + "Local vendors", + "Promotion", + "Toxic plants!" + ] + }, + "intent": "Create a new forum named PlantsForCatParents, with a description of Cat parents & plan lovers, and include ['Cat friendly', 'Local vendors', 'Promotion', 'Toxic plants!'] in the sidebar?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/PlantsForCatParents/edit", + "locator": "document.querySelector(\"#forum_description\").value", + "required_contents": { + "must_include": [ + "Cat parents & plan lovers" + ] + } + }, + { + "url": "__REDDIT__/f/PlantsForCatParents/edit", + "locator": "document.querySelector(\"#forum_sidebar\").value", + "required_contents": { + "must_include": [ + "Cat friendly", + "Local vendors", + "Promotion", + "Toxic plants!" + ] + } + } + ] + }, + "intent_template_id": 7 + }, + { + "sites": [ + "reddit" + ], + "task_id": 584, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a new forum named {{name}}, with a description of {{description}}, and include {{sidebar_list}} in the sidebar?", + "instantiation_dict": { + "name": "Karaoke", + "description": "Place for Karaoke lovers", + "sidebar_list": [ + "devices", + "setup" + ] + }, + "intent": "Create a new forum named Karaoke, with a description of Place for Karaoke lovers, and include ['devices', 'setup'] in the sidebar?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/Karaoke", + "locator": "document.querySelector(\"#forum_description\").value", + "required_contents": { + "must_include": [ + "Place for Karaoke lovers" + ] + } + }, + { + "url": "__REDDIT__/f/Karaoke", + "locator": "document.querySelector(\"#forum_sidebar\").value", + "required_contents": { + "must_include": [ + "devices", + "setup" + ] + } + } + ] + }, + "intent_template_id": 7 + }, + { + "sites": [ + "shopping" + ], + "task_id": 585, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Rate my recent purchase of {{product}} with {{num_star}} stars, using my nickname {{nickname}}?", + "instantiation_dict": { + "product": "floor lamp", + "num_star": 5, + "nickname": "Emma Lopez" + }, + "intent": "Rate my recent purchase of floor lamp with 5 stars, using my nickname Emma Lopez?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating('B00J8RZL7I')", + "required_contents": { + "must_include": [ + "100" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_author('B00J8RZL7I')", + "required_contents": { + "must_include": [ + "Emma Lopez" + ] + } + } + ] + }, + "intent_template_id": 194 + }, + { + "sites": [ + "shopping" + ], + "task_id": 586, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Rate my recent purchase of {{product}} with {{num_star}} stars, using my nickname {{nickname}}?", + "instantiation_dict": { + "product": "Jiffy Corn Muffin Cornbread Mix", + "num_star": 4, + "nickname": "ShoppingEmma" + }, + "intent": "Rate my recent purchase of Jiffy Corn Muffin Cornbread Mix with 4 stars, using my nickname ShoppingEmma?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating('B07HZB38XH')", + "required_contents": { + "must_include": [ + "80" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_author('B07HZB38XH')", + "required_contents": { + "must_include": [ + "ShoppingEmma" + ] + } + } + ] + }, + "intent_template_id": 194 + }, + { + "sites": [ + "shopping" + ], + "task_id": 587, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Rate my recent purchase of {{product}} with {{num_star}} stars, using my nickname {{nickname}}?", + "instantiation_dict": { + "product": "PS3 Remote Controllers", + "num_star": 3, + "nickname": "GamingEmma" + }, + "intent": "Rate my recent purchase of PS3 Remote Controllers with 3 stars, using my nickname GamingEmma?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating('B0041MSF2S')", + "required_contents": { + "must_include": [ + "60" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_author('B0041MSF2S')", + "required_contents": { + "must_include": [ + "GamingEmma" + ] + } + } + ] + }, + "intent_template_id": 194 + }, + { + "sites": [ + "shopping" + ], + "task_id": 588, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Rate my recent purchase of {{product}} with {{num_star}} stars, using my nickname {{nickname}}?", + "instantiation_dict": { + "product": "Foundation For Mattress With Frame Set", + "num_star": 1, + "nickname": "ShoppingEmma" + }, + "intent": "Rate my recent purchase of Foundation For Mattress With Frame Set with 1 stars, using my nickname ShoppingEmma?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating('B07DFJ5XKH')", + "required_contents": { + "must_include": [ + "20" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_author('B07DFJ5XKH')", + "required_contents": { + "must_include": [ + "ShoppingEmma" + ] + } + } + ] + }, + "intent_template_id": 194 + }, + { + "sites": [ + "shopping" + ], + "task_id": 589, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Rate my recent purchase of {{product}} with {{num_star}} stars, using my nickname {{nickname}}?", + "instantiation_dict": { + "product": "Mini Wireless Bluetooth Speaker", + "num_star": 2, + "nickname": "SimpleEmma" + }, + "intent": "Rate my recent purchase of Mini Wireless Bluetooth Speaker with 2 stars, using my nickname SimpleEmma?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating('B09P7BFL4H')", + "required_contents": { + "must_include": [ + "40" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_author('B09P7BFL4H')", + "required_contents": { + "must_include": [ + "SimpleEmma" + ] + } + } + ] + }, + "intent_template_id": 194 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 590, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/primer/design", + "geolocation": null, + "intent_template": "Create a milestone for the upcoming {{event}} starting on {{start_date}} and ending on {{end_date}}", + "instantiation_dict": { + "event": "event of product launch", + "start_date": "1/16/2023", + "end_date": "1/30/2023" + }, + "intent": "Create a milestone for the upcoming event of product launch starting on 1/16/2023 and ending on 1/30/2023", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/primer/design/-/milestones", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"#content-body\").outerText", + "required_contents": { + "must_include": [ + "product launch" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.start_date').outerText", + "required_contents": { + "must_include": [ + "Jan 16, 2030" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.due_date').outerText", + "required_contents": { + "must_include": [ + "Jan 30, 2030" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 339 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 591, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/primer/design", + "geolocation": null, + "intent_template": "Create a milestone for the upcoming {{event}} starting on {{start_date}} and ending on {{end_date}}", + "instantiation_dict": { + "event": "practice of collective code review", + "start_date": "1/16/2023", + "end_date": "in 20 days" + }, + "intent": "Create a milestone for the upcoming practice of collective code review starting on 1/16/2023 and ending on in 20 days", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/primer/design/-/milestones", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"#content-body\").outerText", + "required_contents": { + "must_include": [ + "code review" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.start_date').outerText", + "required_contents": { + "must_include": [ + "Jan 16, 2030" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.due_date').outerText", + "required_contents": { + "must_include": [ + "Feb 5, 2030" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 339 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 592, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/primer/design", + "geolocation": null, + "intent_template": "Create a milestone for the upcoming {{event}} starting on {{start_date}} and ending on {{end_date}}", + "instantiation_dict": { + "event": "task of cleaning sensitive information", + "start_date": "2/16/2023", + "end_date": "in 20 days" + }, + "intent": "Create a milestone for the upcoming task of cleaning sensitive information starting on 2/16/2023 and ending on in 20 days", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/primer/design/-/milestones", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"#content-body\").outerText", + "required_contents": { + "must_include": [ + "sensitive information" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.start_date').outerText", + "required_contents": { + "must_include": [ + "Feb 16, 2030" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.due_date').outerText", + "required_contents": { + "must_include": [ + "Mar 8, 2030" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 339 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 593, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/byteblaze/dotfiles", + "geolocation": null, + "intent_template": "Create a milestone for the upcoming {{event}} starting on {{start_date}} and ending on {{end_date}}", + "instantiation_dict": { + "event": "task of merging all branches to main", + "start_date": "March 15, 2044", + "end_date": "March 30, 2044" + }, + "intent": "Create a milestone for the upcoming task of merging all branches to main starting on March 15, 2044 and ending on March 30, 2044", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/byteblaze/dotfiles/-/milestones", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"#content-body\").outerText", + "required_contents": { + "must_include": [ + "all branches to main" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.start_date').outerText", + "required_contents": { + "must_include": [ + "Mar 15, 2044" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.due_date').outerText", + "required_contents": { + "must_include": [ + "Mar 30, 2044" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 339 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 594, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/byteblaze/dotfiles", + "geolocation": null, + "intent_template": "Create a milestone for the upcoming {{event}} starting on {{start_date}} and ending on {{end_date}}", + "instantiation_dict": { + "event": "task of adding a new branch for zsh comprehensive support", + "start_date": "5/1/2044", + "end_date": "in 20 days" + }, + "intent": "Create a milestone for the upcoming task of adding a new branch for zsh comprehensive support starting on 5/1/2044 and ending on in 20 days", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/byteblaze/dotfiles/-/milestones", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"#content-body\").outerText", + "required_contents": { + "must_include": [ + "zsh comprehensive support" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.start_date').outerText", + "required_contents": { + "must_include": [ + "May 1, 2044" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.due_date').outerText", + "required_contents": { + "must_include": [ + "May 21, 2044" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 339 + }, + { + "sites": [ + "reddit" + ], + "task_id": 595, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Open the thread of a trending post on the forum \"{{subreddit}}\" and subscribe.", + "instantiation_dict": { + "subreddit": "space" + }, + "intent": "Open the thread of a trending post on the forum \"space\" and subscribe.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/space", + "program_html": [ + { + "url": "__REDDIT__/", + "locator": "document.querySelector(\"#sidebar > section\").outerText", + "required_contents": { + "must_include": [ + "space" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 4 + }, + { + "sites": [ + "reddit" + ], + "task_id": 596, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Open the thread of a trending post on the forum \"{{subreddit}}\" and subscribe.", + "instantiation_dict": { + "subreddit": "books" + }, + "intent": "Open the thread of a trending post on the forum \"books\" and subscribe.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books", + "program_html": [ + { + "url": "__REDDIT__/", + "locator": "document.querySelector(\"#sidebar > section\").outerText", + "required_contents": { + "must_include": [ + "books" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 4 + }, + { + "sites": [ + "reddit" + ], + "task_id": 597, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Open the thread of a trending post on the forum \"{{subreddit}}\" and subscribe.", + "instantiation_dict": { + "subreddit": "consoles" + }, + "intent": "Open the thread of a trending post on the forum \"consoles\" and subscribe.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/consoles", + "program_html": [ + { + "url": "__REDDIT__/", + "locator": "document.querySelector(\"#sidebar > section\").outerText", + "required_contents": { + "must_include": [ + "consoles" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 4 + }, + { + "sites": [ + "reddit" + ], + "task_id": 598, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Open the thread of a trending post on the forum \"{{subreddit}}\" and subscribe.", + "instantiation_dict": { + "subreddit": "pittsburgh" + }, + "intent": "Open the thread of a trending post on the forum \"pittsburgh\" and subscribe.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/pittsburgh", + "program_html": [ + { + "url": "__REDDIT__/", + "locator": "document.querySelector(\"#sidebar > section\").outerText", + "required_contents": { + "must_include": [ + "pittsburgh" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 4 + }, + { + "sites": [ + "reddit" + ], + "task_id": 599, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Open the thread of a trending post on the forum \"{{subreddit}}\" and subscribe.", + "instantiation_dict": { + "subreddit": "machine learning" + }, + "intent": "Open the thread of a trending post on the forum \"machine learning\" and subscribe.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/MachineLearning", + "program_html": [ + { + "url": "__REDDIT__/", + "locator": "document.querySelector(\"#sidebar > section\").outerText", + "required_contents": { + "must_include": [ + "MachineLearning" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 4 + }, + { + "sites": [ + "reddit" + ], + "task_id": 600, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post my question, \"{{question}}\", in a subreddit where I'm likely to get an answer", + "instantiation_dict": { + "question": "what is the recommended console to buy these days" + }, + "intent": "Post my question, \"what is the recommended console to buy these days\", in a subreddit where I'm likely to get an answer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/consoles", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "what is the recommended console to buy these days" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 3765 + }, + { + "sites": [ + "reddit" + ], + "task_id": 601, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post my question, \"{{question}}\", in a subreddit where I'm likely to get an answer", + "instantiation_dict": { + "question": "is car necessary in NYC" + }, + "intent": "Post my question, \"is car necessary in NYC\", in a subreddit where I'm likely to get an answer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/nyc", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "is car necessary in NYC" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 3765 + }, + { + "sites": [ + "reddit" + ], + "task_id": 602, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post my question, \"{{question}}\", in a subreddit where I'm likely to get an answer", + "instantiation_dict": { + "question": "places for new drivers to learn driving in pittsburgh" + }, + "intent": "Post my question, \"places for new drivers to learn driving in pittsburgh\", in a subreddit where I'm likely to get an answer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/pittsburgh", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "places for new drivers to learn driving in pittsburgh" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 3765 + }, + { + "sites": [ + "reddit" + ], + "task_id": 603, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post my question, \"{{question}}\", in a subreddit where I'm likely to get an answer", + "instantiation_dict": { + "question": "safe and budge apartment to live in nyc" + }, + "intent": "Post my question, \"safe and budge apartment to live in nyc\", in a subreddit where I'm likely to get an answer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/nyc", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "safe and budge apartment to live in nyc" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 3765 + }, + { + "sites": [ + "reddit" + ], + "task_id": 604, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post my question, \"{{question}}\", in a subreddit where I'm likely to get an answer", + "instantiation_dict": { + "question": "what is the SOTA web navigation agent repo" + }, + "intent": "Post my question, \"what is the SOTA web navigation agent repo\", in a subreddit where I'm likely to get an answer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/deeplearning |OR| __REDDIT__/f/MachineLearning |OR| __REDDIT__/f/singularity", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "what is the SOTA web navigation agent repo" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 3765 + }, + { + "sites": [ + "reddit" + ], + "task_id": 605, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find a subreddit focused on topics related to {{topic}}, and post my question, \"{{question}}\" there", + "instantiation_dict": { + "topic": "gaming consoles", + "question": "what is the recommended console to buy these days" + }, + "intent": "Find a subreddit focused on topics related to gaming consoles, and post my question, \"what is the recommended console to buy these days\" there", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/consoles", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "what is the recommended console to buy these days" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 5 + }, + { + "sites": [ + "reddit" + ], + "task_id": 606, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find a subreddit focused on topics related to {{topic}}, and post my question, \"{{question}}\" there", + "instantiation_dict": { + "topic": "NYC", + "question": "is car necessary" + }, + "intent": "Find a subreddit focused on topics related to NYC, and post my question, \"is car necessary\" there", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/nyc", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "is car necessary" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 5 + }, + { + "sites": [ + "reddit" + ], + "task_id": 607, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find a subreddit focused on topics related to {{topic}}, and post my question, \"{{question}}\" there", + "instantiation_dict": { + "topic": "city Pittsburgh", + "question": "places for new drivers to learn driving" + }, + "intent": "Find a subreddit focused on topics related to city Pittsburgh, and post my question, \"places for new drivers to learn driving\" there", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/pittsburgh", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "places for new drivers to learn driving" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 5 + }, + { + "sites": [ + "reddit" + ], + "task_id": 608, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find a subreddit focused on topics related to {{topic}}, and post my question, \"{{question}}\" there", + "instantiation_dict": { + "topic": "city lives in DMV area", + "question": "safe and budge apartment to live" + }, + "intent": "Find a subreddit focused on topics related to city lives in DMV area, and post my question, \"safe and budge apartment to live\" there", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/washington |OR| __REDDIT__/f/washingtondc", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "safe and budge apartment to live" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 5 + }, + { + "sites": [ + "reddit" + ], + "task_id": 609, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find a subreddit focused on topics related to {{topic}}, and post my question, \"{{question}}\" there", + "instantiation_dict": { + "topic": "ML, DL, NLP", + "question": "what is the SOTA web navigation agent repo" + }, + "intent": "Find a subreddit focused on topics related to ML, DL, NLP, and post my question, \"what is the SOTA web navigation agent repo\" there", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/deeplearning |OR| __REDDIT__/f/MachineLearning |OR| __REDDIT__/f/singularity", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "what is the SOTA web navigation agent repo" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 5 + }, + { + "sites": [ + "reddit" + ], + "task_id": 610, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post a review of my recent reading \"{{book}}\" in the r/books with my comment \"{{content}}\".", + "instantiation_dict": { + "book": "To Kill a Mockingbird by Harper Lee", + "content": "good book!" + }, + "intent": "Post a review of my recent reading \"To Kill a Mockingbird by Harper Lee\" in the r/books with my comment \"good book!\".", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "To Kill a Mockingbird by Harper Lee", + "good book!" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 9 + }, + { + "sites": [ + "reddit" + ], + "task_id": 611, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post a review of my recent reading \"{{book}}\" in the r/books with my comment \"{{content}}\".", + "instantiation_dict": { + "book": "Harry Potter", + "content": "Wonderful journey" + }, + "intent": "Post a review of my recent reading \"Harry Potter\" in the r/books with my comment \"Wonderful journey\".", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "Harry Potter", + "Wonderful journey" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 9 + }, + { + "sites": [ + "reddit" + ], + "task_id": 612, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post a review of my recent reading \"{{book}}\" in the r/books with my comment \"{{content}}\".", + "instantiation_dict": { + "book": "big little lies", + "content": "can't stop it" + }, + "intent": "Post a review of my recent reading \"big little lies\" in the r/books with my comment \"can't stop it\".", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "big little lies", + "can't stop it" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 9 + }, + { + "sites": [ + "reddit" + ], + "task_id": 613, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post a review of my recent reading \"{{book}}\" in the r/books with my comment \"{{content}}\".", + "instantiation_dict": { + "book": "Love story", + "content": "I cried" + }, + "intent": "Post a review of my recent reading \"Love story\" in the r/books with my comment \"I cried\".", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "Love story", + "I cried" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 9 + }, + { + "sites": [ + "reddit" + ], + "task_id": 614, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post a review of my recent reading \"{{book}}\" in the r/books with my comment \"{{content}}\".", + "instantiation_dict": { + "book": "Gone with the wind", + "content": "It's a book with history" + }, + "intent": "Post a review of my recent reading \"Gone with the wind\" in the r/books with my comment \"It's a book with history\".", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "Gone with the wind", + "It's a book with history" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 9 + }, + { + "sites": [ + "reddit" + ], + "task_id": 615, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/pics", + "geolocation": null, + "intent_template": "Re-post the image of {{content}} in this page to {{subreddit}} subreddit and note \"from /f/pics\"", + "instantiation_dict": { + "content": "Bald Eagle", + "subreddit": "earthporn" + }, + "intent": "Re-post the image of Bald Eagle in this page to earthporn subreddit and note \"from /f/pics\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/earthporn", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "from /f/pics" + ] + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "[...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "b02113033af32feae9ff147dbbe3764039368d67d193885bd04e65c2e6beea9c.jpg" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 11 + }, + { + "sites": [ + "reddit" + ], + "task_id": 616, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/pics", + "geolocation": null, + "intent_template": "Re-post the image of {{content}} in this page to {{subreddit}} subreddit and note \"from /f/pics\"", + "instantiation_dict": { + "content": "Thanksgiving turkey", + "subreddit": "funny" + }, + "intent": "Re-post the image of Thanksgiving turkey in this page to funny subreddit and note \"from /f/pics\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/funny", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "from /f/pics" + ] + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "[...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "2e4fa0a328e653a97a7d07046291c298ef5b4e0d0c73a287f317ca86a8e8685f.jpg" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 11 + }, + { + "sites": [ + "reddit" + ], + "task_id": 617, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/pics", + "geolocation": null, + "intent_template": "Re-post the image of {{content}} in this page to {{subreddit}} subreddit and note \"from /f/pics\"", + "instantiation_dict": { + "content": "Firework", + "subreddit": "earthporn" + }, + "intent": "Re-post the image of Firework in this page to earthporn subreddit and note \"from /f/pics\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/earthporn", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "from /f/pics" + ] + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "[...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "92411be6af4e9ad5ccd3ccbaa01c10457bb00e704e99c58dd430de1a958307fd.jpg" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 11 + }, + { + "sites": [ + "reddit" + ], + "task_id": 618, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/pics", + "geolocation": null, + "intent_template": "Re-post the image of {{content}} in this page to {{subreddit}} subreddit and note \"from /f/pics\"", + "instantiation_dict": { + "content": "Wife's costume", + "subreddit": "funny" + }, + "intent": "Re-post the image of Wife's costume in this page to funny subreddit and note \"from /f/pics\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/funny", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "from /f/pics" + ] + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "[...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "6bfbf1280d28d84a9261695f0cac5a90addaaff1174807a2b381fdc159f3ed00.jpg" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 11 + }, + { + "sites": [ + "reddit" + ], + "task_id": 619, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/pics", + "geolocation": null, + "intent_template": "Re-post the image of {{content}} in this page to {{subreddit}} subreddit and note \"from /f/pics\"", + "instantiation_dict": { + "content": "costume contest", + "subreddit": "funny" + }, + "intent": "Re-post the image of costume contest in this page to funny subreddit and note \"from /f/pics\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/funny", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "from /f/pics" + ] + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "[...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "bd8bc5f4c846aac4df08626faa3a34a7d47c8f3bdd92bf615a54afd939f063a7.jpg" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 11 + }, + { + "sites": [ + "reddit" + ], + "task_id": 620, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Ask for advice about {{issue}} in a subreddit for relations", + "instantiation_dict": { + "issue": "deal with long-distance relationships" + }, + "intent": "Ask for advice about deal with long-distance relationships in a subreddit for relations", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/relationship_advice", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "long", + "distance", + "relation" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 12 + }, + { + "sites": [ + "reddit" + ], + "task_id": 621, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Ask for advice about {{issue}} in a subreddit for relations", + "instantiation_dict": { + "issue": "cheat" + }, + "intent": "Ask for advice about cheat in a subreddit for relations", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/relationship_advice", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "cheat" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 12 + }, + { + "sites": [ + "reddit" + ], + "task_id": 622, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Ask for advice about {{issue}} in a subreddit for relations", + "instantiation_dict": { + "issue": "sexual harassment" + }, + "intent": "Ask for advice about sexual harassment in a subreddit for relations", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/relationship_advice", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "sexual", + "harassment" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 12 + }, + { + "sites": [ + "reddit" + ], + "task_id": 623, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Ask for advice about {{issue}} in a subreddit for relations", + "instantiation_dict": { + "issue": "gift for birthday" + }, + "intent": "Ask for advice about gift for birthday in a subreddit for relations", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/relationship_advice", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "gift", + "birthday" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 12 + }, + { + "sites": [ + "reddit" + ], + "task_id": 624, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Ask for advice about {{issue}} in a subreddit for relations", + "instantiation_dict": { + "issue": "break-up remedy" + }, + "intent": "Ask for advice about break-up remedy in a subreddit for relations", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/relationship_advice", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "break", + "remedy" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 12 + }, + { + "sites": [ + "reddit" + ], + "task_id": 625, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a discussion post about \"{{topic}}\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "instantiation_dict": { + "topic": "the effectiveness of online learning" + }, + "intent": "Create a discussion post about \"the effectiveness of online learning\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/machinelearning |OR| __REDDIT__/f/deeplearning", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "your opinion", + "the effectiveness of online learning" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 13 + }, + { + "sites": [ + "reddit" + ], + "task_id": 626, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a discussion post about \"{{topic}}\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "instantiation_dict": { + "topic": "Iphone 14" + }, + "intent": "Create a discussion post about \"Iphone 14\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/iphone", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "your opinion", + "Iphone 14" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 13 + }, + { + "sites": [ + "reddit" + ], + "task_id": 627, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a discussion post about \"{{topic}}\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "instantiation_dict": { + "topic": "Harry Potter movie series" + }, + "intent": "Create a discussion post about \"Harry Potter movie series\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/movies", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "your opinion", + "Harry Potter movie series" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 13 + }, + { + "sites": [ + "reddit" + ], + "task_id": 628, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a discussion post about \"{{topic}}\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "instantiation_dict": { + "topic": "long distance relationship" + }, + "intent": "Create a discussion post about \"long distance relationship\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/relationship_advice", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "your opinion", + "long distance relationship" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 13 + }, + { + "sites": [ + "reddit" + ], + "task_id": 629, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a discussion post about \"{{topic}}\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "instantiation_dict": { + "topic": "Fun thing to do in Pittsburgh" + }, + "intent": "Create a discussion post about \"Fun thing to do in Pittsburgh\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/pittsburgh", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "your opinion", + "Fun thing to do in Pittsburgh" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 13 + }, + { + "sites": [ + "reddit" + ], + "task_id": 630, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Ask for product recommendations for {{category}} within a budget of {{price}} in {{subreddit}}", + "instantiation_dict": { + "category": "noise-cancelling headphones", + "price": "$200", + "subreddit": "r/headphones" + }, + "intent": "Ask for product recommendations for noise-cancelling headphones within a budget of $200 in r/headphones", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/headphones", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "200", + "noise-cancelling", + "headphone" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 15 + }, + { + "sites": [ + "reddit" + ], + "task_id": 631, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Ask for product recommendations for {{category}} within a budget of {{price}} in {{subreddit}}", + "instantiation_dict": { + "category": "running shoes", + "price": "$100", + "subreddit": "r/sports" + }, + "intent": "Ask for product recommendations for running shoes within a budget of $100 in r/sports", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/sports", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "100", + "running", + "shoes" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 15 + }, + { + "sites": [ + "reddit" + ], + "task_id": 632, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Ask for product recommendations for {{category}} within a budget of {{price}} in {{subreddit}}", + "instantiation_dict": { + "category": "running shoes", + "price": "$500", + "subreddit": "r/sports" + }, + "intent": "Ask for product recommendations for running shoes within a budget of $500 in r/sports", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/sports", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "500", + "running", + "shoes" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 15 + }, + { + "sites": [ + "reddit" + ], + "task_id": 633, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Ask for product recommendations for {{category}} within a budget of {{price}} in {{subreddit}}", + "instantiation_dict": { + "category": "running pants", + "price": "$500", + "subreddit": "r/sports" + }, + "intent": "Ask for product recommendations for running pants within a budget of $500 in r/sports", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/sports", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "500", + "running", + "pants" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 15 + }, + { + "sites": [ + "reddit" + ], + "task_id": 634, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Ask for product recommendations for {{category}} within a budget of {{price}} in {{subreddit}}", + "instantiation_dict": { + "category": "used iphone", + "price": "$1000", + "subreddit": "r/iphone" + }, + "intent": "Ask for product recommendations for used iphone within a budget of $1000 in r/iphone", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/iphone", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "1000", + "used iphone" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 15 + }, + { + "sites": [ + "reddit" + ], + "task_id": 635, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post in the most appropriate subreddit and ask for recommendations for {{category}} products within a budget of {{price}}", + "instantiation_dict": { + "category": "noise-cancelling headphones", + "price": "$200" + }, + "intent": "Post in the most appropriate subreddit and ask for recommendations for noise-cancelling headphones products within a budget of $200", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/headphones", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "200", + "noise-cancelling", + "headphone" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 6100 + }, + { + "sites": [ + "reddit" + ], + "task_id": 636, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post in the most appropriate subreddit and ask for recommendations for {{category}} products within a budget of {{price}}", + "instantiation_dict": { + "category": "DIY toolkit", + "price": "$100" + }, + "intent": "Post in the most appropriate subreddit and ask for recommendations for DIY toolkit products within a budget of $100", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/DIY", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "100", + "DIY", + "toolkit" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 6100 + }, + { + "sites": [ + "reddit" + ], + "task_id": 637, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post in the most appropriate subreddit and ask for recommendations for {{category}} products within a budget of {{price}}", + "instantiation_dict": { + "category": "sony headphones", + "price": "$500" + }, + "intent": "Post in the most appropriate subreddit and ask for recommendations for sony headphones products within a budget of $500", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/headphones", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "500", + "sony headphone" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 6100 + }, + { + "sites": [ + "reddit" + ], + "task_id": 638, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post in the most appropriate subreddit and ask for recommendations for {{category}} products within a budget of {{price}}", + "instantiation_dict": { + "category": "must-have product in my life", + "price": "$30" + }, + "intent": "Post in the most appropriate subreddit and ask for recommendations for must-have product in my life products within a budget of $30", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/BuyItForLife", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "30", + "must-have", + "product", + "life" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 6100 + }, + { + "sites": [ + "reddit" + ], + "task_id": 639, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post in the most appropriate subreddit and ask for recommendations for {{category}} products within a budget of {{price}}", + "instantiation_dict": { + "category": "used iphone", + "price": "$1000" + }, + "intent": "Post in the most appropriate subreddit and ask for recommendations for used iphone products within a budget of $1000", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/iphone", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "1000", + "used iphone" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 6100 + }, + { + "sites": [ + "reddit" + ], + "task_id": 640, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post a notice on a virtual meetup for {{interest}} enthusiasts on {{date}} in the {{subreddit}} subreddit", + "instantiation_dict": { + "interest": "book reading", + "date": "March 15th", + "subreddit": "r/books" + }, + "intent": "Post a notice on a virtual meetup for book reading enthusiasts on March 15th in the r/books subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "book reading", + "March 15th", + "virtual meetup" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 16 + }, + { + "sites": [ + "reddit" + ], + "task_id": 641, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post a notice on a virtual meetup for {{interest}} enthusiasts on {{date}} in the {{subreddit}} subreddit", + "instantiation_dict": { + "interest": "Harry Potter", + "date": "July 8th", + "subreddit": "books" + }, + "intent": "Post a notice on a virtual meetup for Harry Potter enthusiasts on July 8th in the books subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "Harry Potter", + "July 8th", + "virtual meetup" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 16 + }, + { + "sites": [ + "reddit" + ], + "task_id": 642, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post a notice on a virtual meetup for {{interest}} enthusiasts on {{date}} in the {{subreddit}} subreddit", + "instantiation_dict": { + "interest": "Big little lies", + "date": "Sep 10th", + "subreddit": "books" + }, + "intent": "Post a notice on a virtual meetup for Big little lies enthusiasts on Sep 10th in the books subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "Big little lies", + "Sep 10th", + "virtual meetup" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 16 + }, + { + "sites": [ + "reddit" + ], + "task_id": 643, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post a notice on a virtual meetup for {{interest}} enthusiasts on {{date}} in the {{subreddit}} subreddit", + "instantiation_dict": { + "interest": "racing cars", + "date": "Oct 21st", + "subreddit": "nyc" + }, + "intent": "Post a notice on a virtual meetup for racing cars enthusiasts on Oct 21st in the nyc subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/nyc", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "racing cars", + "Oct 21st", + "virtual meetup" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 16 + }, + { + "sites": [ + "reddit" + ], + "task_id": 644, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post a notice on a virtual meetup for {{interest}} enthusiasts on {{date}} in the {{subreddit}} subreddit", + "instantiation_dict": { + "interest": "Tears of Kingdom", + "date": "Dec 15th", + "subreddit": "games" + }, + "intent": "Post a notice on a virtual meetup for Tears of Kingdom enthusiasts on Dec 15th in the games subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/games", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "Tears of Kingdom", + "Dec 15th", + "virtual meetup" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 16 + }, + { + "sites": [ + "reddit" + ], + "task_id": 645, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post in {{subreddit}} subreddit about what could machine learning help the correpong field.", + "instantiation_dict": { + "subreddit": "books" + }, + "intent": "Post in books subreddit about what could machine learning help the correpong field.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "machine learning", + "help" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 19 + }, + { + "sites": [ + "reddit" + ], + "task_id": 646, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post in {{subreddit}} subreddit about what could midjourney help the correpong field.", + "instantiation_dict": { + "subreddit": "DIY" + }, + "intent": "Post in DIY subreddit about what could midjourney help the correpong field.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/diy", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "midjourney", + "help" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 19 + }, + { + "sites": [ + "reddit" + ], + "task_id": 647, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post in {{subreddit}} forum about what could open-source LLMs help the correpong field.", + "instantiation_dict": { + "subreddit": "technology" + }, + "intent": "Post in technology forum about what could open-source LLMs help the correpong field.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/technology", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "open-source LLMs", + "help" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 19 + }, + { + "sites": [ + "reddit" + ], + "task_id": 648, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post in {{subreddit}} forum about what could large language models help the correpong field.", + "instantiation_dict": { + "subreddit": "dataisbeautiful" + }, + "intent": "Post in dataisbeautiful forum about what could large language models help the correpong field.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/dataisbeautiful", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "large language models", + "help" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 19 + }, + { + "sites": [ + "reddit" + ], + "task_id": 649, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post in {{subreddit}} subreddit about what could diffusion model help the correpong field.", + "instantiation_dict": { + "subreddit": "history" + }, + "intent": "Post in history subreddit about what could diffusion model help the correpong field.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/history", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "diffusion model", + "help" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 19 + }, + { + "sites": [ + "reddit" + ], + "task_id": 650, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/books/59421/friendly-reminder-bookshop-org-exists", + "geolocation": null, + "intent_template": "Reply to {{position_description}} with my comment \"{{content_description}}\"", + "instantiation_dict": { + "position_description": "the post", + "content_description": "I am a big fan of the bookorg" + }, + "intent": "Reply to the post with my comment \"I am a big fan of the bookorg\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books/59421/friendly-reminder-bookshop-org-exists", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.comment__body').outerText", + "required_contents": { + "exact_match": "I am a big fan of the bookorg" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 23 + }, + { + "sites": [ + "reddit" + ], + "task_id": 651, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/pittsburgh/45899/driving-in-pittsburgh-summed-up-by-one-traffic-sign", + "geolocation": null, + "intent_template": "Reply to {{position_description}} with my comment \"{{content_description}}\"", + "instantiation_dict": { + "position_description": "the post", + "content_description": "Yeah, pittsburgh traffic, you know..." + }, + "intent": "Reply to the post with my comment \"Yeah, pittsburgh traffic, you know...\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/pittsburgh/45899/driving-in-pittsburgh-summed-up-by-one-traffic-sign", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.comment__body').outerText", + "required_contents": { + "exact_match": "Yeah, pittsburgh traffic, you know..." + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 23 + }, + { + "sites": [ + "reddit" + ], + "task_id": 652, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/technology/134852/ai-experts-disown-musk-backed-campaign-citing-their-research", + "geolocation": null, + "intent_template": "Reply to {{position_description}} with my comment \"{{content_description}}\"", + "instantiation_dict": { + "position_description": "the post", + "content_description": "???" + }, + "intent": "Reply to the post with my comment \"???\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/technology/134852/ai-experts-disown-musk-backed-campaign-citing-their-research", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.comment__body').outerText", + "required_contents": { + "exact_match": "???" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 23 + }, + { + "sites": [ + "shopping" + ], + "task_id": 653, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Fill the \"contact us\" form in the site for a refund on the {{product}} I bought, stating that it broke after just three days of use. Also, ensure to include the order number #{{order_id}} and the product SKU. Don't submit yet, I will check.", + "instantiation_dict": { + "product": "phone screen protector", + "order_id": "000000180" + }, + "intent": "Fill the \"contact us\" form in the site for a refund on the phone screen protector I bought, stating that it broke after just three days of use. Also, ensure to include the order number #000000180 and the product SKU. Don't submit yet, I will check.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "refund", + "it broke after three days of use", + "000000180", + "B087QJN9W1" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 153 + }, + { + "sites": [ + "shopping" + ], + "task_id": 654, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Fill the \"contact us\" form in the site for a refund on the {{product}} I bought, stating that it broke after just three days of use. Also, ensure to include the order number #{{order_id}} and the product SKU. Don't submit yet, I will check.", + "instantiation_dict": { + "product": "bluetooth speaker", + "order_id": "161" + }, + "intent": "Fill the \"contact us\" form in the site for a refund on the bluetooth speaker I bought, stating that it broke after just three days of use. Also, ensure to include the order number #161 and the product SKU. Don't submit yet, I will check.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "refund", + "it broke after three days of use", + "161", + "B09P7BFL4H" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 153 + }, + { + "sites": [ + "shopping" + ], + "task_id": 655, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Fill the \"contact us\" form in the site for a refund on the {{product}} I bought, stating that it broke after just three days of use. Also, ensure to include the order number #{{order_id}} and the product SKU. Don't submit yet, I will check.", + "instantiation_dict": { + "product": "iphone case", + "order_id": "180" + }, + "intent": "Fill the \"contact us\" form in the site for a refund on the iphone case I bought, stating that it broke after just three days of use. Also, ensure to include the order number #180 and the product SKU. Don't submit yet, I will check.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "refund", + "it broke after three days of use", + "180", + "B087QJN9W1" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 153 + }, + { + "sites": [ + "shopping" + ], + "task_id": 656, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Fill the \"contact us\" form in the site for a refund on the {{product}} I bought, stating that it broke after just three days of use. Also, ensure to include the order number #{{order_id}} and the product SKU. Don't submit yet, I will check.", + "instantiation_dict": { + "product": "remote controller", + "order_id": "180" + }, + "intent": "Fill the \"contact us\" form in the site for a refund on the remote controller I bought, stating that it broke after just three days of use. Also, ensure to include the order number #180 and the product SKU. Don't submit yet, I will check.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "refund", + "it broke after three days of use", + "180", + "B0041MSF2S" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 153 + }, + { + "sites": [ + "shopping" + ], + "task_id": 657, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Fill the \"contact us\" form in the site for a refund on the {{product}} I bought, stating that it broke after just three days of use. Also, ensure to include the order number #{{order_id}} and the product SKU. Don't submit yet, I will check.", + "instantiation_dict": { + "product": "speaker", + "order_id": "148" + }, + "intent": "Fill the \"contact us\" form in the site for a refund on the speaker I bought, stating that it broke after just three days of use. Also, ensure to include the order number #148 and the product SKU. Don't submit yet, I will check.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "refund", + "broke after three days of use", + "148", + "B003FVW3VA" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 153 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 658, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create an issue in {{repo}} repo with title \"{{issue}}\". Assign the issue to {{account}}. Set due date to be {{due}}", + "instantiation_dict": { + "repo": "a11yproject", + "issue": "401 bad gateway", + "account": "Roshanjossey", + "due": "the end of 2030" + }, + "intent": "Create an issue in a11yproject repo with title \"401 bad gateway\". Assign the issue to Roshanjossey. Set due date to be the end of 2030", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[data-qa-selector=\"title_content\"]').outerText", + "required_contents": { + "exact_match": "401 bad gateway" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-testid=\"sidebar-due-date\"').outerText", + "required_contents": { + "must_include": [ + "Dec 31, 2030" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.assignee').outerText", + "required_contents": { + "must_include": [ + "Roshan Jossey" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 327 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 659, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create an issue in {{repo}} repo with title \"{{issue}}\". Assign the issue to {{account}}. Set due date to be {{due}}", + "instantiation_dict": { + "repo": "empathy-prompts", + "issue": "Integrating LLMs for better prompts", + "account": "Roshanjossey", + "due": "the beginning of Q2 2033" + }, + "intent": "Create an issue in empathy-prompts repo with title \"Integrating LLMs for better prompts\". Assign the issue to Roshanjossey. Set due date to be the beginning of Q2 2033", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/byteblaze/empathy-prompts/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[data-qa-selector=\"title_content\"]').outerText", + "required_contents": { + "exact_match": "Integrating LLMs for better prompts" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-testid=\"sidebar-due-date\"').outerText", + "required_contents": { + "must_include": [ + "Apr 1, 2033" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.assignee').outerText", + "required_contents": { + "must_include": [ + "Roshan Jossey" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 327 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 660, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create an issue in {{repo}} repo with title \"{{issue}}\". Assign the issue to {{account}}. Set due date to be {{due}}", + "instantiation_dict": { + "repo": "dotfiles", + "issue": "add support for oh-my-zsh", + "account": "Abishek", + "due": "July 18 2033" + }, + "intent": "Create an issue in dotfiles repo with title \"add support for oh-my-zsh\". Assign the issue to Abishek. Set due date to be July 18 2033", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/byteblaze/dotfiles/-/tree/main", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[data-qa-selector=\"title_content\"]').outerText", + "required_contents": { + "exact_match": "add support for oh-my-zsh" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-testid=\"sidebar-due-date\"').outerText", + "required_contents": { + "must_include": [ + "Jul 18, 2033" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.assignee').outerText", + "required_contents": { + "must_include": [ + "Abishek S" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 327 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 661, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open an issue to {{issue}} in {{repo}}.", + "instantiation_dict": { + "repo": "ChatGPT", + "issue": "report the issue of connection refused" + }, + "intent": "Open an issue to report the issue of connection refused in ChatGPT.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/convexegg/chatgpt/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": { + "must_include": [ + "connection refused" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 328 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 662, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open an issue to {{issue}} in {{repo}}.", + "instantiation_dict": { + "repo": "aem-hacker", + "issue": "report experiencing \"OSError: [Errno 98] Address already in use\" during executions" + }, + "intent": "Open an issue to report experiencing \"OSError: [Errno 98] Address already in use\" during executions in aem-hacker.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/0ang3el/aem-hacker/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": { + "must_include": [ + "OSError: [Errno 98] Address already in use" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 328 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 663, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open an issue to {{issue}} in {{repo}}.", + "instantiation_dict": { + "repo": "metaseq", + "issue": "ask their plan on supporting Llama and other llama family models" + }, + "intent": "Open an issue to ask their plan on supporting Llama and other llama family models in metaseq.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/root/metaseq/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": { + "must_include": [ + "llama" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 328 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 664, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open an issue to {{issue}} in {{repo}}.", + "instantiation_dict": { + "repo": "awesome-python", + "issue": "ask their plans on adding Python 3.11 related resources" + }, + "intent": "Open an issue to ask their plans on adding Python 3.11 related resources in awesome-python.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/vinta/awesome-python/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": { + "must_include": [ + "Python 3.11" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 328 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 665, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open an issue to {{issue}} in {{repo}}.", + "instantiation_dict": { + "repo": "a11y-syntax-highlighting", + "issue": "request adding support for MT theme editor" + }, + "intent": "Open an issue to request adding support for MT theme editor in a11y-syntax-highlighting.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/byteblaze/a11y-syntax-highlighting/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": { + "must_include": [ + "MT theme editor" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 328 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 666, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/primer/design", + "geolocation": null, + "intent_template": "Submit a request to merge {{source_branch}} branch into {{target_branch}} branch, assign {{reviewer}} as the reviewer", + "instantiation_dict": { + "source_branch": "dialog-component", + "target_branch": "dialog", + "reviewer": "Carol" + }, + "intent": "Submit a request to merge dialog-component branch into dialog branch, assign Carol as the reviewer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/primer/design/-/merge_requests", + "program_html": [ + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[1].outerText", + "required_contents": { + "exact_match": "dialog" + } + }, + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[0].outerText", + "required_contents": { + "exact_match": "dialog-component" + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.reviewer').outerText", + "required_contents": { + "must_include": [ + "Caroline Stewart" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 335 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 667, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/primer/design", + "geolocation": null, + "intent_template": "Submit a merge request for {{source_branch}} branch to be merged into {{target_branch}} branch, assign {{reviewer}} as the reviewer", + "instantiation_dict": { + "source_branch": "dialog-component", + "target_branch": "bump-doctocat", + "reviewer": "primer" + }, + "intent": "Submit a merge request for dialog-component branch to be merged into bump-doctocat branch, assign primer as the reviewer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/primer/design/-/merge_requests", + "program_html": [ + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[1].outerText", + "required_contents": { + "exact_match": "bump-doctocat" + } + }, + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[0].outerText", + "required_contents": { + "exact_match": "dialog-component" + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.reviewer').outerText", + "required_contents": { + "must_include": [ + "Primer" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 335 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 668, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Submit a merge request for {{source_branch}} branch to be merged into {{target_branch}} branch, assign {{reviewer}} as the reviewer", + "instantiation_dict": { + "source_branch": "a11yproject.com/redesign", + "target_branch": "master", + "reviewer": "Justin Armstrong" + }, + "intent": "Submit a merge request for a11yproject.com/redesign branch to be merged into master branch, assign Justin Armstrong as the reviewer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/merge_requests", + "program_html": [ + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[0].outerText", + "required_contents": { + "exact_match": "redesign" + } + }, + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[1].outerText", + "required_contents": { + "exact_match": "main" + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.reviewer').outerText", + "required_contents": { + "must_include": [ + "Justin Armstrong" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 335 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 669, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/byteblaze/solarized-prism-theme", + "geolocation": null, + "intent_template": "Open a new issue to discuss the implementation of {{feature}}", + "instantiation_dict": { + "feature": "dark mode" + }, + "intent": "Open a new issue to discuss the implementation of dark mode", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/byteblaze/solarized-prism-theme/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": { + "must_include": [ + "implementation", + "dark mode" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 337 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 670, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/byteblaze/dotfiles", + "geolocation": null, + "intent_template": "Open a new issue to discuss the implementation of {{feature}}", + "instantiation_dict": { + "feature": "default plugins for .zsh" + }, + "intent": "Open a new issue to discuss the implementation of default plugins for .zsh", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/byteblaze/dotfiles/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": { + "must_include": [ + "implementation", + "default plugins", + "zsh" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 337 + }, + { + "sites": [ + "shopping", + "reddit" + ], + "task_id": 671, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Gather the titles of {{product}} reviews with {{rating}} rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on {{product}}\"", + "instantiation_dict": { + "product": "Sony Computer Entertainment VR", + "rating": "2 stars and less" + }, + "intent": "Gather the titles of Sony Computer Entertainment VR reviews with 2 stars and less rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on Sony Computer Entertainment VR\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/gaming", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__title').outerText", + "required_contents": { + "exact_match": "real user feedback on Sony Computer Entertainment VR" + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "must_include": [ + "didn't last a year without issues", + "Disappointing. Didn't last long before it stopped powering on and needed to be sent in for repair.", + "Received used items!!" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 101 + }, + { + "sites": [ + "shopping", + "reddit" + ], + "task_id": 672, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Gather the titles of {{product}} reviews with {{rating}} rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on {{product}}\"", + "instantiation_dict": { + "product": "Nintendo Switch Fortnite Wildcat Console EU", + "rating": "3 stars and less" + }, + "intent": "Gather the titles of Nintendo Switch Fortnite Wildcat Console EU reviews with 3 stars and less rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on Nintendo Switch Fortnite Wildcat Console EU\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/gaming", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__title').outerText", + "required_contents": { + "exact_match": "real user feedback on Nintendo Switch Fortnite Wildcat Console EU" + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "must_include": [ + "EU charger and wild cat card doesn\u2019t even work!", + "REFUND REJECTED", + "Charging port not compatible", + "not compatible in the US", + "Wildcard Bonus Credits Not Redeemable!", + "Code not available!!" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 101 + }, + { + "sites": [ + "shopping", + "reddit" + ], + "task_id": 673, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Gather the titles of {{product}} reviews with {{rating}} rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on {{product}}\"", + "instantiation_dict": { + "product": "Racing Wheel Overdrive for Xbox X", + "rating": "1 star" + }, + "intent": "Gather the titles of Racing Wheel Overdrive for Xbox X reviews with 1 star rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on Racing Wheel Overdrive for Xbox X\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/gaming", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__title').outerText", + "required_contents": { + "exact_match": "real user feedback on Racing Wheel Overdrive for Xbox X" + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "must_include": [ + "Unable to set neutral steering", + "Doesn\u2019t work with PC", + "Crazy problems in automatic mode", + "pedals stopped working", + "Only works with certain games" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 101 + }, + { + "sites": [ + "shopping", + "reddit" + ], + "task_id": 674, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Gather the titles of {{product}} reviews with {{rating}} rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on {{product}}\"", + "instantiation_dict": { + "product": "Doc and Pies Arcade Factory Cocktail Arcade Machine", + "rating": "3 stars and less" + }, + "intent": "Gather the titles of Doc and Pies Arcade Factory Cocktail Arcade Machine reviews with 3 stars and less rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on Doc and Pies Arcade Factory Cocktail Arcade Machine\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/gaming", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__title').outerText", + "required_contents": { + "exact_match": "real user feedback on Doc and Pies Arcade Factory Cocktail Arcade Machine" + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "must_include": [ + "Poorly Made Exterior. Consider a different Company.", + "piece of junk ,..can't believe I spent money on this !!!!", + "Based arrived broken but game itself works" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 101 + }, + { + "sites": [ + "shopping", + "reddit" + ], + "task_id": 675, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Gather the titles of {{product}} reviews with {{rating}} rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on {{product}}\"", + "instantiation_dict": { + "product": "HORI 3D Surround Gaming Neckset", + "rating": "2 stars and less" + }, + "intent": "Gather the titles of HORI 3D Surround Gaming Neckset reviews with 2 stars and less rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on HORI 3D Surround Gaming Neckset\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/gaming", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__title').outerText", + "required_contents": { + "exact_match": "real user feedback on HORI 3D Surround Gaming Neckset" + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "must_include": [ + "Not worth it for PC users", + "I really wanted to like this.", + "I wish this was better..." + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 101 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 676, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Lookup orders that are {{status}}", + "instantiation_dict": { + "status": "suspected of being fraudulent" + }, + "intent": "Lookup orders that are suspected of being fraudulent", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/sales/order/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div.admin__data-grid-filters-current\").outerText", + "required_contents": { + "must_include": [ + "Suspected Fraud" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 253 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 677, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Lookup orders that are {{status}}", + "instantiation_dict": { + "status": "processing" + }, + "intent": "Lookup orders that are processing", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/sales/order/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div.admin__data-grid-filters-current\").outerText", + "required_contents": { + "must_include": [ + "Processing" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 253 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 678, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Lookup orders that are {{status}}", + "instantiation_dict": { + "status": "canceled" + }, + "intent": "Lookup orders that are canceled", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/sales/order/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div.admin__data-grid-filters-current\").outerText", + "required_contents": { + "must_include": [ + "Canceled" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 253 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 679, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Lookup orders that are {{status}}", + "instantiation_dict": { + "status": "completed" + }, + "intent": "Lookup orders that are completed", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/sales/order/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div.admin__data-grid-filters-current\").outerText", + "required_contents": { + "must_include": [ + "Completed" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 253 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 680, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Lookup orders that are {{status}}", + "instantiation_dict": { + "status": "on hold" + }, + "intent": "Lookup orders that are on hold", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/sales/order/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div.admin__data-grid-filters-current\").outerText", + "required_contents": { + "must_include": [ + "On Hold" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 253 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 681, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find a GitLab repository related to {{topic}} and make a Reddit post linking to it in a relevant subreddit", + "instantiation_dict": { + "topic": "gan implementation" + }, + "intent": "Find a GitLab repository related to gan implementation and make a Reddit post linking to it in a relevant subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/machinelearning |OR| __REDDIT__/f/deeplearning |OR| __REDDIT__/f/technology", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "gan implementation", + "__GITLAB__/eriklindernoren/PyTorch-GAN" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 116 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 682, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find a GitLab repository related to {{topic}} and make a Reddit post linking to it in a relevant subreddit", + "instantiation_dict": { + "topic": "chatGPT" + }, + "intent": "Find a GitLab repository related to chatGPT and make a Reddit post linking to it in a relevant subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/machinelearning |OR| __REDDIT__/f/singularity |OR| __REDDIT__/f/deeplearning |OR| __REDDIT__/f/technology", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "chatgpt", + "__GITLAB__/convexegg/chatgpt" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 116 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 683, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find a GitLab repository related to {{topic}} and make a Reddit post linking to it in a relevant subreddit", + "instantiation_dict": { + "topic": "metaseq" + }, + "intent": "Find a GitLab repository related to metaseq and make a Reddit post linking to it in a relevant subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/machinelearning |OR| __REDDIT__/f/deeplearning", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "metaseq", + "__GITLAB__/root/metaseq" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 116 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 684, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Promote {{repo}} to subreddit {{subreddit}} with the description from the repo itself.", + "instantiation_dict": { + "repo": "byteblaze/cloud-to-butt", + "subreddit": "LifeProTips" + }, + "intent": "Promote byteblaze/cloud-to-butt to subreddit LifeProTips with the description from the repo itself.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/LifeProTips", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText + [...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "Chrome extension that replaces occurrences of 'the cloud' with 'my butt'", + "__GITLAB__/byteblaze/cloud-to-butt" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 117 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 685, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Promote {{repo}} to subreddit {{subreddit}} with the description from the repo itself.", + "instantiation_dict": { + "repo": "byteblaze/dotfiles", + "subreddit": "aww" + }, + "intent": "Promote byteblaze/dotfiles to subreddit aww with the description from the repo itself.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/aww", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText + [...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "Computer setup", + "__GITLAB__/byteblaze/dotfiles" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 117 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 686, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Promote {{repo}} to subreddit {{subreddit}} with the description from the repo itself.", + "instantiation_dict": { + "repo": "auth0/angular-storage", + "subreddit": "technology" + }, + "intent": "Promote auth0/angular-storage to subreddit technology with the description from the repo itself.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/technology", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText + [...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "A storage library for AngularJS done right", + "__GITLAB__/auth0/angular-storage" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 117 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 687, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Promote {{repo}} to subreddit {{subreddit}} with the description from the repo itself.", + "instantiation_dict": { + "repo": "koush/AndroidAsync", + "subreddit": "funny" + }, + "intent": "Promote koush/AndroidAsync to subreddit funny with the description from the repo itself.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/funny", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText + [...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "Asynchronous socket, http(s) (client+server) and websocket library for android. Based on nio, not threads.", + "__GITLAB__/koush/AndroidAsync" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 117 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 688, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Promote {{repo}} to subreddit {{subreddit}} with the description from the repo itself.", + "instantiation_dict": { + "repo": "lahwaacz/arch-wiki-docs", + "subreddit": "science" + }, + "intent": "Promote lahwaacz/arch-wiki-docs to subreddit science with the description from the repo itself.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/science", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText + [...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "A script to download pages from Arch Wiki for offline browsing", + "__GITLAB__/lahwaacz/arch-wiki-docs" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 117 + }, + { + "sites": [ + "shopping" + ], + "task_id": 689, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Draft an email to the shop owner via their contact us function for a coupon as {{reason}}", + "instantiation_dict": { + "reason": "I am a loyal customer" + }, + "intent": "Draft an email to the shop owner via their contact us function for a coupon as I am a loyal customer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "coupon", + "I am a loyal customer" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 163 + }, + { + "sites": [ + "shopping" + ], + "task_id": 690, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Draft an email to the shop owner via their contact us function for a coupon as {{reason}}", + "instantiation_dict": { + "reason": "they promised me a coupon last time" + }, + "intent": "Draft an email to the shop owner via their contact us function for a coupon as they promised me a coupon last time", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "coupon", + "promised" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 163 + }, + { + "sites": [ + "shopping" + ], + "task_id": 691, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Draft an email to the shop owner via their contact us function for a coupon as {{reason}}", + "instantiation_dict": { + "reason": "I plan to make a bulk purchase" + }, + "intent": "Draft an email to the shop owner via their contact us function for a coupon as I plan to make a bulk purchase", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "coupon", + "bulk purchase" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 163 + }, + { + "sites": [ + "shopping" + ], + "task_id": 692, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Draft an email to the shop owner via their contact us function for a coupon as {{reason}}", + "instantiation_dict": { + "reason": "I am a student" + }, + "intent": "Draft an email to the shop owner via their contact us function for a coupon as I am a student", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "coupon", + "student" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 163 + }, + { + "sites": [ + "shopping" + ], + "task_id": 693, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Draft an email to the shop owner via their contact us function for a coupon as {{reason}}", + "instantiation_dict": { + "reason": "my refund is suppoed to be replaced by a coupon" + }, + "intent": "Draft an email to the shop owner via their contact us function for a coupon as my refund is suppoed to be replaced by a coupon", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": { + "must_include": [ + "coupon", + "refund" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 163 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 694, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Add a simple product named {{product}} with {{stock}} in stock, available in size {{size}} and color {{color}}, priced at ${{price}}", + "instantiation_dict": { + "product": "Energy-Bulk Women Shirt", + "stock": "50", + "size": "S", + "color": "blue", + "price": "60" + }, + "intent": "Add a simple product named Energy-Bulk Women Shirt with 50 in stock, available in size S and color blue, priced at $60", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/catalog/product", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "60.00" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[name]\"').value", + "required_contents": { + "must_include": [ + "Energy-Bulk Women Shirt" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "50" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-role=\"selected-option\"').outerText", + "required_contents": { + "must_include": [ + "top" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[size]\"').value", + "required_contents": { + "exact_match": "167" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[color]\"').value", + "required_contents": { + "exact_match": "50" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-index=\"category_ids\"').outerText", + "required_contents": { + "must_include": [ + "tops" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 256 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 695, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Add a simple product named {{product}} with {{stock}} in stock, available in size {{size}} and color {{color}}, priced at ${{price}}", + "instantiation_dict": { + "product": "Energy-Bulk Man Yoga Pant", + "stock": "50", + "size": "38", + "color": "yellow", + "price": "69.99" + }, + "intent": "Add a simple product named Energy-Bulk Man Yoga Pant with 50 in stock, available in size 38 and color yellow, priced at $69.99", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/catalog/product", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "69.99" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[name]\"').value", + "required_contents": { + "must_include": [ + "Energy-Bulk Man Yoga Pant" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "50" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-role=\"selected-option\"').outerText", + "required_contents": { + "must_include": [ + "bottom" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[size]\"').value", + "required_contents": { + "exact_match": "179" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[color]\"').value", + "required_contents": { + "exact_match": "60" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-index=\"category_ids\"').outerText", + "required_contents": { + "must_include": [ + "bottoms" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 256 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 696, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Add a simple product named {{product}} with {{stock}} in stock, available in size {{size}} and color {{color}}, priced at ${{price}}", + "instantiation_dict": { + "product": "FancyBoy Man Causal Jeans", + "stock": "42", + "size": "34", + "color": "Blue", + "price": "169.99" + }, + "intent": "Add a simple product named FancyBoy Man Causal Jeans with 42 in stock, available in size 34 and color Blue, priced at $169.99", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/catalog/product", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"product[name]\"').value", + "required_contents": { + "must_include": [ + "FancyBoy Man Causal Jeans" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "42" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "169.99" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-role=\"selected-option\"').outerText", + "required_contents": { + "must_include": [ + "bottom" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[size]\"').value", + "required_contents": { + "exact_match": "177" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[color]\"').value", + "required_contents": { + "exact_match": "50" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-index=\"category_ids\"').outerText", + "required_contents": { + "must_include": [ + "bottoms" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 256 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 697, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Add a simple product named {{product}} with {{stock}} in stock, available in size {{size}} and color {{color}}, priced at ${{price}}", + "instantiation_dict": { + "product": "Swaatch Smart Watch", + "stock": "42", + "size": "uni-size", + "color": "Blue", + "price": "769.99" + }, + "intent": "Add a simple product named Swaatch Smart Watch with 42 in stock, available in size uni-size and color Blue, priced at $769.99", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/catalog/product", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"product[name]\"').value", + "required_contents": { + "must_include": [ + "Swaatch Smart Watch" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "42" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "769.99" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-role=\"selected-option\"').outerText", + "required_contents": { + "must_include": [ + "gear" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[color]\"').value", + "required_contents": { + "exact_match": "50" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-index=\"category_ids\"').outerText", + "required_contents": { + "must_include": [ + "watches" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 256 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 698, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Add a simple product named {{product}} with {{stock}} in stock, available in size {{size}} and color {{color}}, priced at ${{price}}", + "instantiation_dict": { + "product": "Lelelumon Yoga Mat", + "stock": "42", + "size": "uni-size", + "color": "black", + "price": "769.99" + }, + "intent": "Add a simple product named Lelelumon Yoga Mat with 42 in stock, available in size uni-size and color black, priced at $769.99", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/catalog/product", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"product[name]\"').value", + "required_contents": { + "must_include": [ + "Lelelumon Yoga Mat" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "42" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "769.99" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-role=\"selected-option\"').outerText", + "required_contents": { + "must_include": [ + "gear" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[color]\"').value", + "required_contents": { + "exact_match": "49" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-index=\"category_ids\"').outerText", + "required_contents": { + "must_include": [ + "fitness equipment" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 256 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 699, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Draft a new marketing price rule for {{topic}} that offers {{rule}} for all customers", + "instantiation_dict": { + "topic": "spring sale", + "rule": "a 20 percent discount site-wide" + }, + "intent": "Draft a new marketing price rule for spring sale that offers a 20 percent discount site-wide for all customers", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/sales_rule/promo_quote", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"[name='name'\").value", + "required_contents": { + "must_include": [ + "spring sale" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"website_ids\"').selectedIndex", + "required_contents": { + "exact_match": "0" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"customer_group_ids\"').selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"simple_action\"').value", + "prep_actions": ["document.querySelector('[data-index=\"actions\"]').querySelector('.admin__collapsible-title').click()"], + "required_contents": { + "exact_match": "by_percent" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"discount_amount\"').value", + "prep_actions": ["document.querySelector('[data-index=\"actions\"]').querySelector('.admin__collapsible-title').click()"], + "required_contents": { + "exact_match": "20" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 258 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 700, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Draft a new marketing price rule for {{topic}} that offers {{rule}} for all customers", + "instantiation_dict": { + "topic": "fall discount", + "rule": "$10 discount on checkout" + }, + "intent": "Draft a new marketing price rule for fall discount that offers $10 discount on checkout for all customers", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/sales_rule/promo_quote", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"[name='name'\").value", + "required_contents": { + "must_include": [ + "fall discount" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"website_ids\"').selectedIndex", + "required_contents": { + "exact_match": "0" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"customer_group_ids\"').selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"simple_action\"').value", + "prep_actions": ["document.querySelector('[data-index=\"actions\"]').querySelector('.admin__collapsible-title').click()"], + "required_contents": { + "exact_match": "cart_fixed" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"discount_amount\"').value", + "prep_actions": ["document.querySelector('[data-index=\"actions\"]').querySelector('.admin__collapsible-title').click()"], + "required_contents": { + "exact_match": "10" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 258 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 701, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Draft a new marketing price rule for {{topic}} that offers {{rule}} for all customers", + "instantiation_dict": { + "topic": "Mother's day sale", + "rule": "$15 discount on checkout" + }, + "intent": "Draft a new marketing price rule for Mother's day sale that offers $15 discount on checkout for all customers", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/sales_rule/promo_quote", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"[name='name'\").value", + "required_contents": { + "must_include": [ + "Mother's day sale" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"website_ids\"').selectedIndex", + "required_contents": { + "exact_match": "0" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"customer_group_ids\"').selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"simple_action\"').value", + "prep_actions": ["document.querySelector('[data-index=\"actions\"]').querySelector('.admin__collapsible-title').click()"], + "required_contents": { + "exact_match": "cart_fixed" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"discount_amount\"').value", + "prep_actions": ["document.querySelector('[data-index=\"actions\"]').querySelector('.admin__collapsible-title').click()"], + "required_contents": { + "exact_match": "15" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 258 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 702, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Draft a new marketing price rule for {{topic}} that offers {{rule}} for all customers", + "instantiation_dict": { + "topic": "Pride Month", + "rule": "45% off on all products" + }, + "intent": "Draft a new marketing price rule for Pride Month that offers 45% off on all products for all customers", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/sales_rule/promo_quote", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"[name='name'\").value", + "required_contents": { + "must_include": [ + "Pride Month" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"website_ids\"').selectedIndex", + "required_contents": { + "exact_match": "0" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"customer_group_ids\"').selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"simple_action\"').value", + "prep_actions": ["document.querySelector('[data-index=\"actions\"]').querySelector('.admin__collapsible-title').click()"], + "required_contents": { + "exact_match": "by_percent" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"discount_amount\"').value", + "prep_actions": ["document.querySelector('[data-index=\"actions\"]').querySelector('.admin__collapsible-title').click()"], + "required_contents": { + "exact_match": "45" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 258 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 703, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Draft a new marketing price rule for {{topic}} that offers {{rule}} for all customers", + "instantiation_dict": { + "topic": "Thanks giving sale", + "rule": "$40 discount on checkout" + }, + "intent": "Draft a new marketing price rule for Thanks giving sale that offers $40 discount on checkout for all customers", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/sales_rule/promo_quote", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"[name='name'\").value", + "required_contents": { + "must_include": [ + "Thanks giving sale" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"website_ids\"').selectedIndex", + "required_contents": { + "exact_match": "0" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"customer_group_ids\"').selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"simple_action\"').value", + "prep_actions": ["document.querySelector('[data-index=\"actions\"]').querySelector('.admin__collapsible-title').click()"], + "required_contents": { + "exact_match": "cart_fixed" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"discount_amount\"').value", + "prep_actions": ["document.querySelector('[data-index=\"actions\"]').querySelector('.admin__collapsible-title').click()"], + "required_contents": { + "exact_match": "40" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 258 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 704, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Today is 3/15/2023, generate a {{report}} {{time_span}}", + "instantiation_dict": { + "report": "sales order report", + "time_span": "for last month" + }, + "intent": "Today is 3/15/2023, generate a sales order report for last month", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/reports/report_sales/sales", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "2/1/23" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "2/28/23" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 268 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 705, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Today is 3/15/2023, generate a {{report}} {{time_span}}", + "instantiation_dict": { + "report": "sales order report", + "time_span": "over the last 45 days" + }, + "intent": "Today is 3/15/2023, generate a sales order report over the last 45 days", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/reports/report_sales/sales", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "1/29/23" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "3/15/23" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 268 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 706, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Today is 3/15/2023, generate a {{report}} {{time_span}}", + "instantiation_dict": { + "report": "refund report", + "time_span": "for Q1" + }, + "intent": "Today is 3/15/2023, generate a refund report for Q1", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/reports/report_sales/refunded", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "1/1/23" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "3/31/23" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 268 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 707, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Today is 3/15/2023, generate a {{report}} {{time_span}}", + "instantiation_dict": { + "report": "sales order report", + "time_span": "for last year" + }, + "intent": "Today is 3/15/2023, generate a sales order report for last year", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/reports/report_sales/sales", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "1/1/2022" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "12/31/2022" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 268 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 708, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Today is 3/15/2023, generate a {{report}} {{time_span}}", + "instantiation_dict": { + "report": "tax report", + "time_span": "for this year" + }, + "intent": "Today is 3/15/2023, generate a tax report for this year", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/reports/report_sales/tax/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "1/1/2023" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "12/31/2023" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 268 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 709, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Create an {{type}} report from {{start_date}} to {{end_date}}", + "instantiation_dict": { + "type": "orders", + "start_date": "beginning of May 2021", + "end_date": "end of March 2022" + }, + "intent": "Create an orders report from beginning of May 2021 to end of March 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/reports/report_sales/sales", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "5/1/2021" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "3/31/2022" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 271 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 710, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Create a {{type}} report from {{start_date}} to {{end_date}}", + "instantiation_dict": { + "type": "shipping", + "start_date": "08/05/2022", + "end_date": "03/01/2023" + }, + "intent": "Create a shipping report from 08/05/2022 to 03/01/2023", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/reports/report_sales/shipping", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "8/5/22" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "3/1/23" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 271 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 711, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Create a {{type}} report from {{start_date}} to {{end_date}}", + "instantiation_dict": { + "type": "product view", + "start_date": "07/05/2021", + "end_date": "05/31/2023" + }, + "intent": "Create a product view report from 07/05/2021 to 05/31/2023", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/reports/report_product/viewed/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "7/5/21" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "5/31/23" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 271 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 712, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Create a {{type}} report from {{start_date}} to {{end_date}}", + "instantiation_dict": { + "type": "coupons", + "start_date": "05/01/2021", + "end_date": "05/15/2023" + }, + "intent": "Create a coupons report from 05/01/2021 to 05/15/2023", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/reports/report_sales/coupons/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "5/1/21" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "5/15/23" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 271 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 713, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Create a {{type}} report from {{start_date}} to {{end_date}}", + "instantiation_dict": { + "type": "best sellers", + "start_date": "05/01/2022", + "end_date": "05/31/2023" + }, + "intent": "Create a best sellers report from 05/01/2022 to 05/31/2023", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/reports/report_sales/bestsellers/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "5/1/22" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "5/31/23" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 271 + }, + { + "sites": [ + "reddit" + ], + "task_id": 714, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Thumbs down the top {{k}} post ever in {{subreddit}}.", + "instantiation_dict": { + "subreddit": "gadgets", + "k": "1" + }, + "intent": "Thumbs down the top 1 post ever in gadgets.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/gadgets/19459/a-custom-gaming-pc-built-inside-a-vintage-1940s-motorola", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 24 + }, + { + "sites": [ + "reddit" + ], + "task_id": 715, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Thumbs down the top {{k}} post ever in {{subreddit}}.", + "instantiation_dict": { + "subreddit": "history", + "k": "2" + }, + "intent": "Thumbs down the top 2 post ever in history.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/history/84338/the-scientist-who-discovered-sperm-was-so-grossed-out-he", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "__REDDIT__/f/history/105990/4-500-year-old-sumerian-temple-dedicated-to-mighty-thunder", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 24 + }, + { + "sites": [ + "reddit" + ], + "task_id": 716, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Thumbs down the top {{k}} post ever in {{subreddit}}.", + "instantiation_dict": { + "k": "3", + "subreddit": "books" + }, + "intent": "Thumbs down the top 3 post ever in books.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/books/81371/the-letters-of-t-s-eliot-to-emily-hale-that-were-kept-sealed", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "__REDDIT__/f/books/59421/friendly-reminder-bookshop-org-exists", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "__REDDIT__/f/books/59447/appalachian-prison-book-project-seeks-notebook-donations-the", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 24 + }, + { + "sites": [ + "reddit" + ], + "task_id": 717, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Thumbs down the top {{k}} post ever in {{subreddit}}.", + "instantiation_dict": { + "k": "4", + "subreddit": "movies" + }, + "intent": "Thumbs down the top 4 post ever in movies.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/movies/86174/who-will-win-the-oscar-for-actress-in-a-supporting-role", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "__REDDIT__/f/movies/86029/who-will-win-the-oscar-for-film-editing", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "__REDDIT__/f/movies/86055/cindy-williams-dies-laverne-amp-shirley-star-who-appeared-in", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "__REDDIT__/f/movies/42682/michelle-yeoh-to-receive-palm-springs-film-festival-s", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 24 + }, + { + "sites": [ + "reddit" + ], + "task_id": 718, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Thumbs down the top {{k}} post ever in {{subreddit}}.", + "instantiation_dict": { + "k": "5", + "subreddit": "technology" + }, + "intent": "Thumbs down the top 5 post ever in technology.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/technology/48670/brain-cancer-vaccine-succeeds-at-prolonging-survival-in", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "__REDDIT__/f/technology/134696/india-cuts-internet-for-27-million-people-amid-search-for", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "__REDDIT__/f/technology/48785/us-judge-orders-amazon-to-cease-and-desist-anti-union", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "__REDDIT__/f/technology/70354/activision-s-boston-studio-workers-announce-unionization", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "__REDDIT__/f/technology/70233/social-media-influencers-are-charged-with-feeding-followers", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 24 + }, + { + "sites": [ + "reddit" + ], + "task_id": 719, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Like all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "subreddit": "wallstreetbets", + "user": "ThetaGang_wsb" + }, + "intent": "Like all submissions created by ThetaGang_wsb in subreddit wallstreetbets", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/wallstreetbets/29478/how-will-airbnb-close-following-their-earnings-report-on", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "__REDDIT__/f/wallstreetbets/29458/how-much-will-the-federal-reserve-raise-interest-rates-in", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 25 + }, + { + "sites": [ + "reddit" + ], + "task_id": 720, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Like all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "subreddit": "earthporn", + "user": "CameronKelsey" + }, + "intent": "Like all submissions created by CameronKelsey in subreddit earthporn", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/EarthPorn/98332/my-favorite-place-on-the-planet-henry-s-fork-of-the-snake", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "__REDDIT__/f/EarthPorn/98297/2-years-later-this-is-still-one-of-the-most-incredible", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "__REDDIT__/f/EarthPorn/98256/i-can-t-wait-for-all-this-green-to-start-coming-back-little", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 25 + }, + { + "sites": [ + "reddit" + ], + "task_id": 721, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Like all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "UniversityofBath", + "subreddit": "IAmA" + }, + "intent": "Like all submissions created by UniversityofBath in subreddit IAmA", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/IAmA/119742/hi-i-m-vienne-a-doctoral-student-at-the-university-of-bath-i", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "__REDDIT__/f/IAmA/119719/hello-reddit-i-m-nazia-mehrban-a-lecturer-in-biotechnology", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "__REDDIT__/f/IAmA/119714/i-m-ellie-jarvis-she-her-a-2nd-year-phd-student-in-the", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "__REDDIT__/f/IAmA/55155/hi-i-m-dr-lucy-maddox-from-bath-university-uk-i-m-a-clinical", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "__REDDIT__/f/IAmA/55142/we-re-sadeka-nujhat-hannah-leese-and-sandhya-moise-from-the", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "__REDDIT__/f/IAmA/34032/we-re-sandhya-moise-david-phillips-and-chan-lee-from-the", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "__REDDIT__/f/IAmA/13175/hi-i-m-kit-yates-i-m-a-mathematical-biologist-at-the", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "__REDDIT__/f/IAmA/13170/hello-i-m-dr-sara-fontani-from-the-university-of", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 25 + }, + { + "sites": [ + "reddit" + ], + "task_id": 722, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Like all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "Don_Gato1", + "subreddit": "new york" + }, + "intent": "Like all submissions created by Don_Gato1 in subreddit new york", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/nyc/44650/fox-news-hosts-cast-new-york-as-crime-ridden-and-chaotic", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 25 + }, + { + "sites": [ + "reddit" + ], + "task_id": 723, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Like all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "FTorrez81", + "subreddit": "iphone13" + }, + "intent": "Like all submissions created by FTorrez81 in subreddit iphone13", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "reference_answer_raw_annotation": "N/A", + "string_note": "FTorrez81 does not have any submissions in iphone13" + }, + "intent_template_id": 25, + "string_note": "FTorrez81 has no submissions in subreddit iphone13" + }, + { + "sites": [ + "reddit" + ], + "task_id": 724, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Like all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "Hrekires", + "subreddit": "news" + }, + "intent": "Like all submissions created by Hrekires in subreddit news", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/news/129816/gov-whitmer-signs-bills-to-repeal-right-to-work-restore", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "__REDDIT__/f/news/129808/disney-world-deal-with-union-will-raise-minimum-wage-to-18", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "__REDDIT__/f/news/129794/judge-halts-wyoming-abortion-ban-days-after-it-took-effect", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "__REDDIT__/f/news/129783/don-t-say-gay-lawmaker-pleads-guilty-to-covid-relief-fraud", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "__REDDIT__/f/news/129594/arizona-gov-katie-hobbs-refuses-to-proceed-with-execution", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "__REDDIT__/f/news/129508/tennessee-governor-oks-bill-to-cut-nashville-council-in-half", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "__REDDIT__/f/news/43839/philadelphia-da-larry-krasner-impeached-by-pa-house", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "__REDDIT__/f/news/43781/crypto-giant-ftx-to-file-for-bankruptcy-ceo-sam-bankman", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "__REDDIT__/f/news/43572/sec-doj-investigating-crypto-platform-ftx", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "__REDDIT__/f/news/43558/kansas-gov-laura-kelly-wins-re-election-defeating-gop", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 25 + }, + { + "sites": [ + "reddit" + ], + "task_id": 725, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "DisLike all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "subreddit": "massachusetts", + "user": "RickyDontLoseThat" + }, + "intent": "DisLike all submissions created by RickyDontLoseThat in subreddit massachusetts", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/massachusetts/84954/the-last-of-lincoln", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 1510 + }, + { + "sites": [ + "reddit" + ], + "task_id": 726, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "DisLike all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "subreddit": "earthporn", + "user": "jacyanthis" + }, + "intent": "DisLike all submissions created by jacyanthis in subreddit earthporn", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "jacyanthis does not have any submissions in earthporn", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 1510 + }, + { + "sites": [ + "reddit" + ], + "task_id": 727, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "DisLike all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "PatientBuilder499", + "subreddit": "videos" + }, + "intent": "DisLike all submissions created by PatientBuilder499 in subreddit videos", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/videos/115139/hundreds-of-civilian-turkish-volunteers-waiting-to-be-sent", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 1510 + }, + { + "sites": [ + "reddit" + ], + "task_id": 728, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "DisLike all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "sirbarani", + "subreddit": "sports" + }, + "intent": "DisLike all submissions created by sirbarani in subreddit sports", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/sports/48303/iran-football-legend-daei-will-not-attend-world-cup-amid", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 1510 + }, + { + "sites": [ + "reddit" + ], + "task_id": 729, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "DisLike all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "AdamCannon", + "subreddit": "UpliftingNews" + }, + "intent": "DisLike all submissions created by AdamCannon in subreddit UpliftingNews", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/UpliftingNews/16087/same-sex-marriage-is-now-legal-in-all-of-mexico-s-states", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 1510 + }, + { + "sites": [ + "reddit" + ], + "task_id": 730, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "DisLike all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "Hrekires", + "subreddit": "news" + }, + "intent": "DisLike all submissions created by Hrekires in subreddit news", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/news/129816/gov-whitmer-signs-bills-to-repeal-right-to-work-restore", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "__REDDIT__/f/news/129808/disney-world-deal-with-union-will-raise-minimum-wage-to-18", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "__REDDIT__/f/news/129794/judge-halts-wyoming-abortion-ban-days-after-it-took-effect", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "__REDDIT__/f/news/129783/don-t-say-gay-lawmaker-pleads-guilty-to-covid-relief-fraud", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "__REDDIT__/f/news/129594/arizona-gov-katie-hobbs-refuses-to-proceed-with-execution", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "__REDDIT__/f/news/129508/tennessee-governor-oks-bill-to-cut-nashville-council-in-half", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "__REDDIT__/f/news/43839/philadelphia-da-larry-krasner-impeached-by-pa-house", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "__REDDIT__/f/news/43781/crypto-giant-ftx-to-file-for-bankruptcy-ceo-sam-bankman", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "__REDDIT__/f/news/43572/sec-doj-investigating-crypto-platform-ftx", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + }, + { + "url": "__REDDIT__/f/news/43558/kansas-gov-laura-kelly-wins-re-election-defeating-gop", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 1510 + }, + { + "sites": [ + "reddit" + ], + "task_id": 731, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Edit my post on {{post}} by adding a line to the body that says \"{{content}}\"", + "instantiation_dict": { + "post": "Nvidia RTX 4090", + "content": "EDIT: This news aged well" + }, + "intent": "Edit my post on Nvidia RTX 4090 by adding a line to the body that says \"EDIT: This news aged well\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/MachineLearning/1/nvidia-rtx-4090", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "must_include": [ + "EDIT: This news aged well", + "Crazy device for ML!" + ] + } + } + ] + }, + "intent_template_id": 27 + }, + { + "sites": [ + "reddit" + ], + "task_id": 732, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Edit my post on {{post}} by adding a line to the body that says \"{{content}}\"", + "instantiation_dict": { + "post": "The Night Agent", + "content": "Done watching, pretty cool!" + }, + "intent": "Edit my post on The Night Agent by adding a line to the body that says \"Done watching, pretty cool!\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/television/134868/the-night-agent-renewed-for-season-2-at-netflix", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "exact_match": "Done watching, pretty cool!" + } + } + ] + }, + "intent_template_id": 27 + }, + { + "sites": [ + "reddit" + ], + "task_id": 733, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Edit my post on {{post}} by adding a line to the body that says \"{{content}}\"", + "instantiation_dict": { + "post": "Star Trek Starfleet Academy series", + "content": "Every watch makes me feel like a kid again" + }, + "intent": "Edit my post on Star Trek Starfleet Academy series by adding a line to the body that says \"Every watch makes me feel like a kid again\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/television/135201/star-trek-starfleet-academy-series-from-alex-kurtzman-and", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "exact_match": "Every watch makes me feel like a kid again" + } + } + ] + }, + "intent_template_id": 27 + }, + { + "sites": [ + "reddit" + ], + "task_id": 734, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Edit my post on {{post}} by adding a line to the body that says \"{{content}}\"", + "instantiation_dict": { + "post": "Ted Lasso", + "content": "Done watching. I love the renew!" + }, + "intent": "Edit my post on Ted Lasso by adding a line to the body that says \"Done watching. I love the renew!\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/television/135156/ted-lasso-season-3-premiere-scores-870k-u-s-households-up-59", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "exact_match": "Done watching. I love the renew!" + } + } + ] + }, + "intent_template_id": 27 + }, + { + "sites": [ + "reddit" + ], + "task_id": 735, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Edit my post on {{post}} by adding a line to the body that says \"{{content}}\"", + "instantiation_dict": { + "post": "Lord of the Rings", + "content": "The cast is amazing!" + }, + "intent": "Edit my post on Lord of the Rings by adding a line to the body that says \"The cast is amazing!\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/television/135152/lord-of-the-rings-the-rings-of-power-season-2-cast-adds", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "exact_match": "The cast is amazing!" + } + } + ] + }, + "intent_template_id": 27 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 736, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Make the LICENSE of {{repo}} to MIT license.", + "instantiation_dict": { + "repo": "gimmiethat.space and dotfiles" + }, + "intent": "Make the LICENSE of gimmiethat.space and dotfiles to MIT license.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/blob/main/LICENSE", + "locator": "", + "required_contents": { + "must_include": [ + "MIT license", + "The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software." + ] + } + }, + { + "url": "__GITLAB__/byteblaze/dotfiles/-/blob/main/LICENSE", + "locator": "", + "required_contents": { + "must_include": [ + "MIT license", + "The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software." + ] + } + } + ] + }, + "intent_template_id": 355 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 737, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the way from {{location}} to the home stadium of {{sport_team}} {{time}}", + "instantiation_dict": { + "location": "Carnegie Mellon University", + "sport_team": "Philadelphia 76ers", + "time": "" + }, + "intent": "Show me the way from Carnegie Mellon University to the home stadium of Philadelphia 76ers ", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Carnegie Mellon University", + "Pittsburgh" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Wells Fargo Center", + "South Philadelphia Sports Complex" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + } + ] + }, + "intent_template_id": 94 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 738, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the way from {{location}} to the home stadium of {{sport_team}} {{time}}", + "instantiation_dict": { + "location": "Carnegie Mellon University", + "sport_team": "Philadelphia 76ers", + "time": "in the 70th" + }, + "intent": "Show me the way from Carnegie Mellon University to the home stadium of Philadelphia 76ers in the 70th", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Carnegie Mellon University", + "Pittsburgh" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "3601 South Broad Street", + "South Philadelphia" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + } + ] + }, + "intent_template_id": 94 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 739, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the way from {{location}} to the home stadium of {{sport_team}} {{time}}", + "instantiation_dict": { + "location": "Carnegie Mellon University", + "sport_team": "Yankees", + "time": "in the 80th" + }, + "intent": "Show me the way from Carnegie Mellon University to the home stadium of Yankees in the 80th", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Carnegie Mellon University", + "Pittsburgh" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Yankee Stadium", + "East 161st Street" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + } + ] + }, + "intent_template_id": 94 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 740, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the way from {{location}} to the home stadium of {{sport_team}} {{time}}", + "instantiation_dict": { + "location": "Carnegie Mellon University", + "sport_team": "NYC NBA team", + "time": "" + }, + "intent": "Show me the way from Carnegie Mellon University to the home stadium of NYC NBA team ", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Carnegie Mellon University", + "Pittsburgh" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Madison Square Garden", + "Pennsylvania Plaza", + "Manhattan", + "New York" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + } + ] + }, + "intent_template_id": 94 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 741, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the way from {{location}} to the home stadium of {{sport_team}} {{time}}", + "instantiation_dict": { + "location": "Carnegie Mellon University", + "sport_team": "Boston home NBA team", + "time": "" + }, + "intent": "Show me the way from Carnegie Mellon University to the home stadium of Boston home NBA team ", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Carnegie Mellon University", + "Pittsburgh" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "TD Garden", + "Boston", + "Massachusetts" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + } + ] + }, + "intent_template_id": 94 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 742, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a new {{scope}} project \"planner\" and add {{account_list}} as members", + "instantiation_dict": { + "scope": "private", + "account_list": "Abishek, Vinta" + }, + "intent": "Create a new private project \"planner\" and add Abishek, Vinta as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/planner", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "private" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/planner/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@abisubramanya27", + "@vinta" + ] + } + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 743, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a new {{scope}} project \"web_arena\" and add {{account_list}} as members", + "instantiation_dict": { + "scope": "public", + "account_list": "Abishek, Vinta" + }, + "intent": "Create a new public project \"web_arena\" and add Abishek, Vinta as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/web_arena", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "public" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/web_arena/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@abisubramanya27", + "@vinta" + ] + } + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 744, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a new {{scope}} project \"AutoAGI\" and add {{account_list}} as members", + "instantiation_dict": { + "scope": "public", + "account_list": "primer" + }, + "intent": "Create a new public project \"AutoAGI\" and add primer as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/AutoAGI", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "public" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/AutoAGI/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@primer" + ] + } + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 745, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a new {{scope}} project \"awesome-llms\" and add {{account_list}} as members", + "instantiation_dict": { + "scope": "public", + "account_list": "primer, convexegg, abishek" + }, + "intent": "Create a new public project \"awesome-llms\" and add primer, convexegg, abishek as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/awesome-llms", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "public" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/awesome-llms/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@primer", + "@convexegg", + "@abisubramanya27" + ] + } + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 746, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a new {{scope}} project \"llm_bulk_inference\" and add {{account_list}} as members", + "instantiation_dict": { + "scope": "private", + "account_list": "primer, convexegg, abishek" + }, + "intent": "Create a new private project \"llm_bulk_inference\" and add primer, convexegg, abishek as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/llm_bulk_inference", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/llm_bulk_inference/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@primer", + "@convexegg", + "@abisubramanya27" + ] + } + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 747, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Start a private project {{project_name}} with {{template}} template and add {{account_list}} as members", + "instantiation_dict": { + "project_name": "awesome_web_agents", + "template": "blank", + "account_list": "Abishek, Vinta" + }, + "intent": "Start a private project awesome_web_agents with blank template and add Abishek, Vinta as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/awesome_web_agents", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/awesome_web_agents/-/commits", + "locator": "", + "required_contents": { + "must_include": [ + "Initial commit" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/awesome_web_agents/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@abisubramanya27", + "@vinta" + ] + } + } + ] + }, + "intent_template_id": 2100 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 748, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Start a private project {{project_name}} with {{template}} template and add {{account_list}} as members", + "instantiation_dict": { + "project_name": "web_agent_android_xl", + "template": "Android", + "account_list": "primer, convexegg, abishek" + }, + "intent": "Start a private project web_agent_android_xl with Android template and add primer, convexegg, abishek as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/web_agent_android_xl", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/web_agent_android_xl/-/commits", + "locator": "", + "required_contents": { + "must_include": [ + "Initialized from 'Android' project template" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/web_agent_android_xl/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@primer", + "@convexegg", + "@abisubramanya27" + ] + } + } + ] + }, + "intent_template_id": 2100 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 749, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Start a private project {{project_name}} with {{template}} template and add {{account_list}} as members", + "instantiation_dict": { + "project_name": "project_site", + "template": "NodeJS", + "account_list": "primer, convexegg, vinta" + }, + "intent": "Start a private project project_site with NodeJS template and add primer, convexegg, vinta as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/project_site", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/project_site/-/commits", + "locator": "", + "required_contents": { + "must_include": [ + "Initialized from 'NodeJS Express' project template" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/project_site/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@primer", + "@convexegg", + "@vinta" + ] + } + } + ] + }, + "intent_template_id": 2100 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 750, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Start a private project {{project_name}} with {{template}} template and add {{account_list}} as members", + "instantiation_dict": { + "project_name": "agi_index", + "template": "HTML", + "account_list": "Vinta Chen" + }, + "intent": "Start a private project agi_index with HTML template and add Vinta Chen as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/agi_index", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/agi_index", + "locator": "document.querySelector('.home-panel-description-markdown').outerText", + "required_contents": { + "must_include": [ + "Example plain HTML site using GitLab Pages: https://pages.gitlab.io/plain-html |OR| A plain HTML site that uses Netlify for CI/CD instead of GitLab, but still with all the other great GitLab features." + ] + } + }, + { + "url": "__GITLAB__/byteblaze/agi_index/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "Vinta Chen" + ] + } + } + ] + }, + "intent_template_id": 2100 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 751, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Start a private project {{project_name}} with {{template}} template and add {{account_list}} as members", + "instantiation_dict": { + "project_name": "AGISite", + "template": "JEKYLL", + "account_list": "Rohan and Vinta" + }, + "intent": "Start a private project AGISite with JEKYLL template and add Rohan and Vinta as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/AGISite", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/AGISite", + "locator": "document.querySelector('.home-panel-description-markdown').outerText", + "required_contents": { + "must_include": [ + "Example Jekyll site using GitLab Pages: https://pages.gitlab.io/jekyll |OR| A Jekyll site that uses Netlify for CI/CD instead of GitLab, but still with all the other great GitLab features." + ] + } + }, + { + "url": "__GITLAB__/byteblaze/AGISite/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@Seirdy", + "@vinta" + ] + } + } + ] + }, + "intent_template_id": 2100 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 752, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a private {{template}} repository called \"{{project_name}}\" using the right template to speed up development.", + "instantiation_dict": { + "project_name": "web_agent", + "template": "blank" + }, + "intent": "Create a private blank repository called \"web_agent\" using the right template to speed up development.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/web_agent", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/web_agent/-/commits", + "locator": "", + "required_contents": { + "must_include": [ + "Initial commit" + ] + } + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 753, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a private {{template}} repository called \"{{project_name}}\" using the right template to speed up development.", + "instantiation_dict": { + "project_name": "web_agent_android_xs", + "template": "Android" + }, + "intent": "Create a private Android repository called \"web_agent_android_xs\" using the right template to speed up development.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/web_agent_android_xs", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/web_agent_android_xs/-/commits", + "locator": "", + "required_contents": { + "must_include": [ + "Initialized from 'Android' project template" + ] + } + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 754, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a private {{template}} repository called \"{{project_name}}\" using the right template to speed up development.", + "instantiation_dict": { + "project_name": "web_agent_nodejs", + "template": "NodeJS" + }, + "intent": "Create a private NodeJS repository called \"web_agent_nodejs\" using the right template to speed up development.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/web_agent_nodejs", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/web_agent_nodejs/-/commits", + "locator": "", + "required_contents": { + "must_include": [ + "Initialized from 'NodeJS Express' project template" + ] + } + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 755, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a private {{template}} repository called \"{{project_name}}\" using the right template to speed up development.", + "instantiation_dict": { + "project_name": "web_agent_index", + "template": "HTML" + }, + "intent": "Create a private HTML repository called \"web_agent_index\" using the right template to speed up development.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/web_agent_index", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/web_agent_index", + "locator": "document.querySelector('.home-panel-description-markdown').outerText", + "required_contents": { + "must_include": [ + "Example plain HTML site using GitLab Pages: https://pages.gitlab.io/plain-html |OR| A plain HTML site that uses Netlify for CI/CD instead of GitLab, but still with all the other great GitLab features." + ] + } + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 756, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a private {{template}} repository called \"{{project_name}}\" using the right template to speed up development.", + "instantiation_dict": { + "project_name": "11711_gitlab", + "template": "JEKYLL" + }, + "intent": "Create a private JEKYLL repository called \"11711_gitlab\" using the right template to speed up development.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/11711_gitlab", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "__GITLAB__/byteblaze/11711_gitlab", + "locator": "document.querySelector('.home-panel-description-markdown').outerText", + "required_contents": { + "must_include": [ + "Example Jekyll site using GitLab Pages: https://pages.gitlab.io/jekyll |OR| A Jekyll site that uses Netlify for CI/CD instead of GitLab, but still with all the other great GitLab features." + ] + } + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "map" + ], + "task_id": 757, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the path and travel time from {{city1}} to {{city2}}.", + "instantiation_dict": { + "city1": "home of the 1980 Super Bowl champions", + "city2": "home of the 1991 Super Bowl champions" + }, + "intent": "Show me the path and travel time from home of the 1980 Super Bowl champions to home of the 1991 Super Bowl champions.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Pittsburgh" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "New York" + ] + } + } + ] + }, + "intent_template_id": 42 + }, + { + "sites": [ + "map" + ], + "task_id": 758, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the path and travel time from {{city1}} to {{city2}}.", + "instantiation_dict": { + "city1": "the big apple", + "city2": "biggest city in Maine" + }, + "intent": "Show me the path and travel time from the big apple to biggest city in Maine.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "New York" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Portland", + "Maine" + ] + } + } + ] + }, + "intent_template_id": 42 + }, + { + "sites": [ + "map", + "shopping_admin" + ], + "task_id": 759, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the route and driving time from {{city1}} to {{city2}}", + "instantiation_dict": { + "city1": "the city where my E-commerce customer Sophia Young lives", + "city2": "New York City" + }, + "intent": "Show me the route and driving time from the city where my E-commerce customer Sophia Young lives to New York City", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Boston" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "New York" + ] + } + } + ] + }, + "intent_template_id": 42 + }, + { + "sites": [ + "map", + "shopping_admin" + ], + "task_id": 760, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the route and driving time from {{city1}} to {{city2}}", + "instantiation_dict": { + "city1": "Allentown, PA", + "city2": "the city where my E-commerce customer Amanda Kim lives" + }, + "intent": "Show me the route and driving time from Allentown, PA to the city where my E-commerce customer Amanda Kim lives", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Allentown" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Hoboken", + "New Jersey" + ] + } + } + ] + }, + "intent_template_id": 42 + }, + { + "sites": [ + "map" + ], + "task_id": 761, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Get directions from {{location/address_1}} to {{location/address_2}} using {{transportation}} options.", + "instantiation_dict": { + "location/address_1": "Carnegie Science Museum", + "location/address_2": "Hunt library CMU", + "transportation": "walk" + }, + "intent": "Get directions from Carnegie Science Museum to Hunt library CMU using walk options.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "2" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Carnegie Science Center", + "Allegheny County", + "Pittsburgh" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Hunt Library", + "Pittsburgh" + ] + } + } + ] + }, + "intent_template_id": 54 + }, + { + "sites": [ + "map" + ], + "task_id": 762, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Get directions from {{location/address_1}} to {{location/address_2}} using {{transportation}} options.", + "instantiation_dict": { + "location/address_1": "Carnegie Music Hall in NYC", + "location/address_2": "Carnegie Mellon University", + "transportation": "driving" + }, + "intent": "Get directions from Carnegie Music Hall in NYC to Carnegie Mellon University using driving options.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Carnegie Hall", + "West 57th Street", + "Manhattan", + "New York" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Carnegie Mellon University", + "Pittsburgh" + ] + } + } + ] + }, + "intent_template_id": 54 + }, + { + "sites": [ + "map" + ], + "task_id": 763, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the walkway to the closest {{store}} from {{location}}.", + "instantiation_dict": { + "store": "Trader Joe's", + "location": "401 Shady Ave, Pittsburgh" + }, + "intent": "Find the walkway to the closest Trader Joe's from 401 Shady Ave, Pittsburgh.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "2" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "401, Shady Avenue, Shadyside" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Trader Joe's, 6343, Penn Avenue, East Liberty" + ] + } + } + ] + }, + "intent_template_id": 75 + }, + { + "sites": [ + "map" + ], + "task_id": 764, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the walkway to the closest {{store}} from {{location}}.", + "instantiation_dict": { + "store": "Target", + "location": "401 Shady Ave, Pittsburgh" + }, + "intent": "Find the walkway to the closest Target from 401 Shady Ave, Pittsburgh.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "2" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "401, Shady Avenue, Shadyside" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Target, 6231, Penn Avenue, East Liberty" + ] + } + } + ] + }, + "intent_template_id": 75 + }, + { + "sites": [ + "map" + ], + "task_id": 765, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the walkway to the closest {{store}} from {{location}}.", + "instantiation_dict": { + "store": "Japanese food market", + "location": "401 Shady Ave, Pittsburgh" + }, + "intent": "Find the walkway to the closest Japanese food market from 401 Shady Ave, Pittsburgh.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "2" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "401, Shady Avenue, Shadyside" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Tokyo Japanese Food Store, 5855, Ellsworth Avenue, Shadyside" + ] + } + } + ] + }, + "intent_template_id": 75 + }, + { + "sites": [ + "map" + ], + "task_id": 766, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the walkway to the closest {{store}} from {{location}}.", + "instantiation_dict": { + "store": "grocessory owned by Amazon", + "location": "401 Shady Ave, Pittsburgh" + }, + "intent": "Find the walkway to the closest grocessory owned by Amazon from 401 Shady Ave, Pittsburgh.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "2" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "401, Shady Avenue, Shadyside" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Whole Foods Market, 5700, Penn Avenue, East Liberty" + ] + } + } + ] + }, + "intent_template_id": 75 + }, + { + "sites": [ + "map" + ], + "task_id": 767, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the walkway to the closest {{store}} from {{location}}.", + "instantiation_dict": { + "store": "chain grocessory owned by a local business", + "location": "401 Shady Ave, Pittsburgh" + }, + "intent": "Find the walkway to the closest chain grocessory owned by a local business from 401 Shady Ave, Pittsburgh.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "2" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "401, Shady Avenue, Shadyside" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Giant Eagle, 5550, Centre Avenue, Shadyside" + ] + } + } + ] + }, + "intent_template_id": 75 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 768, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "{{quantity}} {{product}} arrived, update the stock", + "instantiation_dict": { + "quantity": "5", + "product": "blue Cronus yoga pants with size 33" + }, + "intent": "5 blue Cronus yoga pants with size 33 arrived, update the stock", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/872/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "5" + } + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/872/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value", + "required_contents": { + "exact_match": "1" + } + } + ] + }, + "intent_template_id": 241 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 769, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "We've received {{quantity}} {{product}}, please update the inventory.", + "instantiation_dict": { + "quantity": "378", + "product": "brown Aero daily fitness tee in every size" + }, + "intent": "We've received 378 brown Aero daily fitness tee in every size, please update the inventory.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/544/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "478" + } + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/547/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "478" + } + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/550/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "478" + } + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/553/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "478" + } + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/556/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "478" + } + } + ] + }, + "intent_template_id": 241 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 770, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "We've received {{quantity}}, update the inventory.", + "instantiation_dict": { + "quantity": "12 white Cora parachute pant of size 28 and 56 blue of size 29" + }, + "intent": "We've received 12 white Cora parachute pant of size 28 and 56 blue of size 29, update the inventory.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1836/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "112" + } + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1838/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "156" + } + } + ] + }, + "intent_template_id": 241 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 771, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Approve the positive reviews to display in our store.", + "instantiation_dict": {}, + "intent": "Approve the positive reviews to display in our store.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/352", + "locator": "document.querySelector('[name=\"status_id\"').value", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/349", + "locator": "document.querySelector('[name=\"status_id\"').value", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/347", + "locator": "document.querySelector('[name=\"status_id\"').value", + "required_contents": { + "exact_match": "1" + } + } + ] + }, + "intent_template_id": 243 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 772, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Delete all {{review_type}}", + "instantiation_dict": { + "review_type": "pending negative reviews for Circe fleece" + }, + "intent": "Delete all pending negative reviews for Circe fleece", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/999", + "locator": "", + "required_contents": { + "must_include": [ + "Rating isn't Available" + ] + } + } + ] + }, + "intent_template_id": 246 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 773, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Delete all {{review_type}}", + "instantiation_dict": { + "review_type": "pending negative reviews" + }, + "intent": "Delete all pending negative reviews", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/351", + "locator": "", + "required_contents": { + "must_include": [ + "Rating isn't Available" + ] + } + }, + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/353", + "locator": "", + "required_contents": { + "must_include": [ + "Rating isn't Available" + ] + } + } + ] + }, + "intent_template_id": 246 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 774, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Delete all {{review_type}}", + "instantiation_dict": { + "review_type": "pending reviews with less than 4 stars" + }, + "intent": "Delete all pending reviews with less than 4 stars", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/351", + "locator": "", + "required_contents": { + "must_include": [ + "Rating isn't Available" + ] + } + }, + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/353", + "locator": "", + "required_contents": { + "must_include": [ + "Rating isn't Available" + ] + } + }, + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/349", + "locator": "", + "required_contents": { + "must_include": [ + "Rating isn't Available" + ] + } + } + ] + }, + "intent_template_id": 246 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 775, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Delete all {{review_type}}", + "instantiation_dict": { + "review_type": "reviews from the scammer Arden" + }, + "intent": "Delete all reviews from the scammer Arden", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/51", + "locator": "", + "required_contents": { + "must_include": [ + "Rating isn't Available" + ] + } + } + ] + }, + "intent_template_id": 246 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 776, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Delete all {{review_type}}", + "instantiation_dict": { + "review_type": "reviews from the scammer Carlo" + }, + "intent": "Delete all reviews from the scammer Carlo", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/93", + "locator": "", + "required_contents": { + "must_include": [ + "Rating isn't Available" + ] + } + }, + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/109", + "locator": "", + "required_contents": { + "must_include": [ + "Rating isn't Available" + ] + } + } + ] + }, + "intent_template_id": 246 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 777, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "{{action}} the price of {{config}} by {{amount}}", + "instantiation_dict": { + "amount": "$5", + "action": "Reduce", + "config": "green Hollister backyard sweater in all size" + }, + "intent": "Reduce the price of green Hollister backyard sweater in all size by $5", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/120/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "47.00" + } + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/117/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "47.00" + } + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/114/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "47.00" + } + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/111/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "47.00" + } + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/123/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "47.00" + } + } + ] + }, + "intent_template_id": 742 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 778, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "{{action}} the price of {{config}} by {{amount}}", + "instantiation_dict": { + "amount": "13.5%", + "action": "Reduce", + "config": "size 28 Sahara leggings" + }, + "intent": "Reduce the price of size 28 Sahara leggings by 13.5%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1841/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "64.88" + } + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1842/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "64.88" + } + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1843/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "64.88" + } + } + ] + }, + "intent_template_id": 742 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 779, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "{{action}} the price of {{config}} by {{amount}}", + "instantiation_dict": { + "amount": "15%", + "action": "Reduce", + "config": "yellow shirts from Gwyn Endurance in all size below L" + }, + "intent": "Reduce the price of yellow shirts from Gwyn Endurance in all size below L by 15%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1559/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "20.40" + } + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1562/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "20.40" + } + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1565/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "20.40" + } + } + ] + }, + "intent_template_id": 742 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 780, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1481/", + "geolocation": null, + "intent_template": "{{action}} the price of {{config}} by {{amount}}", + "instantiation_dict": { + "amount": "$17", + "action": "Increase", + "config": "white Ingrid Running with size L and above" + }, + "intent": "Increase the price of white Ingrid Running with size L and above by $17", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1264/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "64.00" + } + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1267/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "64.00" + } + } + ] + }, + "intent_template_id": 742 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 781, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "{{action}} the price of {{config}} by {{amount}}", + "instantiation_dict": { + "amount": "37%", + "action": "Increase", + "config": "black fitness tshirts from Desiree with size XS" + }, + "intent": "Increase the price of black fitness tshirts from Desiree with size XS by 37%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1573/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "32.88" + } + } + ] + }, + "intent_template_id": 742 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 782, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "{{action}} the price of {{config}} by {{amount}}", + "instantiation_dict": { + "amount": "23%", + "action": "Increase", + "config": "all blue running tshirts in extra small and small sizes" + }, + "intent": "Increase the price of all blue running tshirts in extra small and small sizes by 23%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/496/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "22.33" + } + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/499/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "22.33" + } + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/479/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "21.56" + } + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/482/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "21.56" + } + } + ] + }, + "intent_template_id": 742 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 783, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Add the following users to repo {{repo}} as {{role}}: {{user_list}}", + "instantiation_dict": { + "repo": "kkroening/ffmpeg-python", + "role": "maintainer", + "user_list": [ + "yjlou", + "a11yproject" + ] + }, + "intent": "Add the following users to repo kkroening/ffmpeg-python as maintainer: ['yjlou', 'a11yproject']", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "not the owner of the repo so cannot add maintainer", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 351 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 784, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/CellularPrivacy/Android-IMSI-Catcher-Detector", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the contributor who has the most commits to branch {{branch_name}}", + "instantiation_dict": { + "branch_name": "main", + "attribute": "email address" + }, + "intent": "Tell me the email address of the contributor who has the most commits to branch main", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "secupwn@users.noreply.github.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "secupwn@users.noreply.github.com" + }, + "intent_template_id": 316 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 785, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/CellularPrivacy/Android-IMSI-Catcher-Detector", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the contributor who has the most commits to branch {{branch_name}}", + "instantiation_dict": { + "branch_name": "gh-page", + "attribute": "email address" + }, + "intent": "Tell me the email address of the contributor who has the most commits to branch gh-page", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "secupwn@users.noreply.github.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "secupwn@users.noreply.github.com" + }, + "intent_template_id": 316 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 786, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/vinta/awesome-python", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the contributor who has the most commits to branch {{branch_name}}", + "instantiation_dict": { + "branch_name": "main", + "attribute": "number of commits" + }, + "intent": "Tell me the number of commits of the contributor who has the most commits to branch main", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "412" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "412" + }, + "intent_template_id": 316 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 787, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/dehenne/awesome-visibility/-/graphs/master", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the contributor who has the most commits to branch {{branch_name}}", + "instantiation_dict": { + "branch_name": "main", + "attribute": "number of followers" + }, + "intent": "Tell me the number of followers of the contributor who has the most commits to branch main", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 316 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 788, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/amwhalen/archive-my-tweets/-/tree/github/fork/chtitux/addRssFeed", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the contributor who has the most commits to branch {{branch_name}}", + "instantiation_dict": { + "branch_name": "php52", + "attribute": "full name, gitlab account name, location and email address" + }, + "intent": "Tell me the full name, gitlab account name, location and email address of the contributor who has the most commits to branch php52", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Andrew M. Whalen", + "amwhalen", + "Massachusetts", + "github@amwhalen.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Andrew M. Whalen, amwhalen, Massachusetts, github@amwhalen.com" + }, + "intent_template_id": 316 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 789, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create an issue {{issue}} in {{repo}}.", + "instantiation_dict": { + "repo": "huggingface dataset", + "issue": "asking about do they have any plan on supporting Webagent in the next quater" + }, + "intent": "Create an issue asking about do they have any plan on supporting Webagent in the next quater in huggingface dataset.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "huggingface repo does not exist", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 328 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 790, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Delete all {{review_type}}", + "instantiation_dict": { + "review_type": "negative reviews for Sybil running short" + }, + "intent": "Delete all negative reviews for Sybil running short", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "No negative reviews for sybil running short", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 246 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 791, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Make a folder named {{directory}} on the {{gitlab_repo}} repo and include a file called urls.txt that consists of the links to the 5 most recent posts from {{subreddit}}.", + "instantiation_dict": { + "directory": "car", + "subreddit": "cars", + "gitlab_repo": "gimmiethat.space" + }, + "intent": "Make a folder named car on the gimmiethat.space repo and include a file called urls.txt that consists of the links to the 5 most recent posts from cars.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "the subreddit does not exist", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 84 + }, + { + "sites": [ + "shopping" + ], + "task_id": 792, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy the highest rated product from the {{product_category}} category within a budget {{dollar_value}}.", + "instantiation_dict": { + "product_category": "Beauty & Personal Care", + "dollar_value": "under 20" + }, + "intent": "Buy the highest rated product from the Beauty & Personal Care category within a budget under 20.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The website does not support sorting by rating and there are too many products in this category", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 172 + }, + { + "sites": [ + "shopping" + ], + "task_id": 793, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy the highest rated product from the {{product_category}} category within a budget {{dollar_value}}.", + "instantiation_dict": { + "product_category": "Men clothing", + "dollar_value": "above 50 but under 129.99" + }, + "intent": "Buy the highest rated product from the Men clothing category within a budget above 50 but under 129.99.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The website does not support sorting by rating and there are too many products in this category", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 172 + }, + { + "sites": [ + "shopping" + ], + "task_id": 794, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Change the delivery address for my most recent order to {{address}}.", + "instantiation_dict": { + "address": "4000 Forbes Ave, Pittsburgh, PA" + }, + "intent": "Change the delivery address for my most recent order to 4000 Forbes Ave, Pittsburgh, PA.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The shipping address cannot be changed after an order is placed in the website.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 191 + }, + { + "sites": [ + "shopping" + ], + "task_id": 795, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Change the delivery address for my most recent order to {{address}}.", + "instantiation_dict": { + "address": "6726 McPherson Blvd, Pittsburgh, PA" + }, + "intent": "Change the delivery address for my most recent order to 6726 McPherson Blvd, Pittsburgh, PA.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The shipping address cannot be changed after an order is placed in the website.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 191 + }, + { + "sites": [ + "shopping" + ], + "task_id": 796, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Change the delivery address for my most recent order to {{address}}.", + "instantiation_dict": { + "address": "155 5th Street, San Francisco, CA" + }, + "intent": "Change the delivery address for my most recent order to 155 5th Street, San Francisco, CA.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The shipping address cannot be changed after an order is placed in the website.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 191 + }, + { + "sites": [ + "shopping" + ], + "task_id": 797, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Change the delivery address for my most recent order to {{address}}.", + "instantiation_dict": { + "address": "3 Oxford St, Cambridge, MA" + }, + "intent": "Change the delivery address for my most recent order to 3 Oxford St, Cambridge, MA.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The shipping address cannot be changed after an order is placed in the website.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 191 + }, + { + "sites": [ + "shopping" + ], + "task_id": 798, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Change the delivery address for my most recent order to {{address}}.", + "instantiation_dict": { + "address": "77 Massachusetts Ave, Cambridge, MA" + }, + "intent": "Change the delivery address for my most recent order to 77 Massachusetts Ave, Cambridge, MA.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The shipping address cannot be changed after an order is placed in the website.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 191 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 799, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "create a new group \"{{name}}\" with members {{members}}", + "instantiation_dict": { + "name": "n-lab", + "members": "patou, egpast, westurner, jontutcher" + }, + "intent": "create a new group \"n-lab\" with members patou, egpast, westurner, jontutcher", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/groups/n-lab/-/group_members", + "locator": "", + "required_contents": { + "must_include": [ + "@patou", + "@egpast", + "@westurner", + "@jontutcher" + ] + } + } + ] + }, + "intent_template_id": 600 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 800, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "create a new group \"{{name}}\" with members {{members}}", + "instantiation_dict": { + "name": "x-lab", + "members": "JonasVautherin, dilipchandima, dawiss1337, bmyun, DCMJY" + }, + "intent": "create a new group \"x-lab\" with members JonasVautherin, dilipchandima, dawiss1337, bmyun, DCMJY", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/groups/x-lab/-/group_members", + "locator": "", + "required_contents": { + "must_include": [ + "@JonasVautherin", + "@dilipchandima", + "@dawiss1337", + "@bmyun", + "@DCMJY" + ] + } + } + ] + }, + "intent_template_id": 600 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 801, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "create a new group \"{{name}}\" with members {{members}}", + "instantiation_dict": { + "name": "crew", + "members": "ASWATFZLLC, patrickhlauke, westurner, linkmatrix" + }, + "intent": "create a new group \"crew\" with members ASWATFZLLC, patrickhlauke, westurner, linkmatrix", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/groups/crew/-/group_members", + "locator": "", + "required_contents": { + "must_include": [ + "@ASWATFZLLC", + "@patrickhlauke", + "@westurner", + "@linkmatrix" + ] + } + } + ] + }, + "intent_template_id": 600 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 802, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "create a new group \"{{name}}\" with members {{members}}", + "instantiation_dict": { + "name": "coding_friends", + "members": "qhduan, Agnes-U" + }, + "intent": "create a new group \"coding_friends\" with members qhduan, Agnes-U", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/groups/coding_friends/-/group_members", + "locator": "", + "required_contents": { + "must_include": [ + "@qhduan", + "@Agnes-U" + ] + } + } + ] + }, + "intent_template_id": 600 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 803, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "create a new group \"{{name}}\" with members {{members}}", + "instantiation_dict": { + "name": "webagent", + "members": "pandey2000, sayakpaul, sayakpaul" + }, + "intent": "create a new group \"webagent\" with members pandey2000, sayakpaul, sayakpaul", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/groups/webagent/-/group_members", + "locator": "", + "required_contents": { + "must_include": [ + "@pandey2000", + "@sayakpaul" + ] + } + } + ] + }, + "intent_template_id": 600 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 804, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Assign the issue regarding {{issue}} to {{account}}.", + "instantiation_dict": { + "issue": "flash alerts", + "account": "myself and primer" + }, + "intent": "Assign the issue regarding flash alerts to myself and primer.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/dashboard/issues?scope=all&state=opened&assignee_username=byteblaze", + "locator": "", + "required_contents": { + "must_include": [ + "Add documentation on using Flash alerts in dialog components", + "Clarify usage of flash alert" + ] + } + }, + { + "url": "__GITLAB__/dashboard/issues?scope=all&state=opened&assignee_username=primer", + "locator": "", + "required_contents": { + "must_include": [ + "Add documentation on using Flash alerts in dialog components", + "Clarify usage of flash alert" + ] + } + } + ] + }, + "intent_template_id": 999 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 805, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "Submit a merge request for {{source_branch}} to be merged into {{target_branch}} branch, assign {{reviewer}} as the reviewer", + "instantiation_dict": { + "source_branch": "the branch that implements the support of template strings", + "target_branch": "master", + "reviewer": "myself and Roshan" + }, + "intent": "Submit a merge request for the branch that implements the support of template strings to be merged into master branch, assign myself and Roshan as the reviewer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/merge_requests", + "program_html": [ + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[0].outerText", + "required_contents": { + "exact_match": "feature/replace-gulp" + } + }, + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[1].outerText", + "required_contents": { + "exact_match": "main" + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.reviewer').outerText", + "required_contents": { + "must_include": [ + "Byte Blaze", + "Roshan Jossy" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 335 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 806, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Submit a merge request for {{source_branch}} branch to be merged into {{target_branch}} branch, assign {{reviewer}} as the reviewer", + "instantiation_dict": { + "source_branch": "a11yproject.com/redesign", + "target_branch": "markdown-figure-block", + "reviewer": "myself" + }, + "intent": "Submit a merge request for a11yproject.com/redesign branch to be merged into markdown-figure-block branch, assign myself as the reviewer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/merge_requests", + "program_html": [ + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[0].outerText", + "required_contents": { + "exact_match": "redesign" + } + }, + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[1].outerText", + "required_contents": { + "exact_match": "feature/markdown-figure-block" + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.reviewer').outerText", + "required_contents": { + "must_include": [ + "Byte Blaze" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 335 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 807, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/primer/design", + "geolocation": null, + "intent_template": "Submit a merge request for {{source_branch}} branch to be merged into {{target_branch}} branch, assign {{reviewer}} as the reviewer", + "instantiation_dict": { + "source_branch": "build time debug", + "target_branch": "main", + "reviewer": "myself" + }, + "intent": "Submit a merge request for build time debug branch to be merged into main branch, assign myself as the reviewer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/primer/design/-/merge_requests", + "program_html": [ + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[1].outerText", + "required_contents": { + "exact_match": "main" + } + }, + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[0].outerText", + "required_contents": { + "exact_match": "debug-build-time" + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.reviewer').outerText", + "required_contents": { + "must_include": [ + "Byte Blaze" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 335 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 808, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create an issue in {{repo}} repo with title \"{{issue}}\". Assign the issue to {{account}}. Set due date to be {{due}}", + "instantiation_dict": { + "repo": "cloud-to-butt", + "issue": "Let's keep the project alive", + "account": "myself", + "due": "the end of Q1 2033" + }, + "intent": "Create an issue in cloud-to-butt repo with title \"Let's keep the project alive\". Assign the issue to myself. Set due date to be the end of Q1 2033", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/byteblaze/cloud-to-butt/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[data-qa-selector=\"title_content\"]').outerText", + "required_contents": { + "exact_match": "Let's keep the project alive" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-testid=\"sidebar-due-date\"').outerText", + "required_contents": { + "must_include": [ + "Mar 31, 2033" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.assignee').outerText", + "required_contents": { + "must_include": [ + "Byte Blaze" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 327 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 809, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create an issue in {{repo}} repo with title \"{{issue}}\". Assign the issue to {{account}}. Set due date to be {{due}}", + "instantiation_dict": { + "repo": "a11yproject", + "issue": "404 for many URLs", + "account": "myself", + "due": "2030-1-3" + }, + "intent": "Create an issue in a11yproject repo with title \"404 for many URLs\". Assign the issue to myself. Set due date to be 2030-1-3", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[data-qa-selector=\"title_content\"]').outerText", + "required_contents": { + "exact_match": "404 for many URLs" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-testid=\"sidebar-due-date\"').outerText", + "required_contents": { + "must_include": [ + "Jan 3, 2030" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.assignee').outerText", + "required_contents": { + "must_include": [ + "Byte Blaze" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 327 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 810, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Assign the issue regarding {{issue}} in {{repo}} repo to {{account}}.", + "instantiation_dict": { + "repo": "primer design guide", + "issue": "flash alert", + "account": "myself" + }, + "intent": "Assign the issue regarding flash alert in primer design guide repo to myself.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/dashboard/issues?scope=all&state=opened&assignee_username=byteblaze", + "locator": "", + "required_contents": { + "must_include": [ + "Add documentation on using Flash alerts in dialog components", + "Clarify usage of flash alert" + ] + } + } + ] + }, + "intent_template_id": 999 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 811, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Assign the issue regarding {{issue}} in {{repo}} to {{account}}.", + "instantiation_dict": { + "repo": "a11yproject", + "issue": 404, + "account": "myself" + }, + "intent": "Assign the issue regarding 404 in a11yproject to myself.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/dashboard/issues?scope=all&state=opened&assignee_username=byteblaze", + "locator": "", + "required_contents": { + "must_include": [ + "404s, bad host, timeouts, bad urls for URLs linked from website" + ] + } + } + ] + }, + "intent_template_id": 999 + } +] diff --git a/VAB-WebArena-Lite/config_files/wa/test_webarena_lite.json b/VAB-WebArena-Lite/config_files/wa/test_webarena_lite.json new file mode 100644 index 0000000..ca37c1e --- /dev/null +++ b/VAB-WebArena-Lite/config_files/wa/test_webarena_lite.json @@ -0,0 +1,5838 @@ +[ + { + "sites": [ + "shopping_admin" + ], + "task_id": 0, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What are the top-{{n}} best-selling product in {{period}}", + "instantiation_dict": { + "n": 3, + "period": "Jan 2023" + }, + "intent": "What are the top-3 best-selling product in Jan 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Impulse Duffle", + "Overnight Duffle", + "Hawkeye Yoga Short-32-Blue" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Impulse Duffle, Overnight Duffle, Hawkeye Yoga Short-32-Blue" + }, + "intent_template_id": 279, + "old_task_id": 4 + }, + { + "sites": [ + "map" + ], + "task_id": 1, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Tell me the full address of all {{airport_type}} that are within a driving distance of {{radius}} to {{start}}", + "instantiation_dict": { + "airport_type": "international airports", + "start": "Carnegie Mellon University", + "radius": "50 km" + }, + "intent": "Tell me the full address of all international airports that are within a driving distance of 50 km to Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Pittsburgh International Airport, Southern Beltway, Findlay Township, Allegheny County, 15231, United States" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Pittsburgh International Airport People Movers, Airport Boulevard, Findlay Township, Allegheny County, Pennsylvania, 15231, United States" + }, + "intent_template_id": 79, + "old_task_id": 7 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 2, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Tell me the the number of reviews that our store received by far that mention term \"{{term}}\"", + "instantiation_dict": { + "term": "best" + }, + "intent": "Tell me the the number of reviews that our store received by far that mention term \"best\"", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "2" + }, + "intent_template_id": 288, + "old_task_id": 15 + }, + { + "sites": [ + "map" + ], + "task_id": 3, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Compare the difference in time for walking and driving route from {{start}} to {{end}}", + "instantiation_dict": { + "start": "Randyland", + "end": "Carnegie Mellon University" + }, + "intent": "Compare the difference in time for walking and driving route from Randyland to Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "driving: 13min", + "walking: 1h 45min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "driving: 13min, walking: 1h 45min." + }, + "intent_template_id": 73, + "old_task_id": 20 + }, + { + "sites": [ + "shopping" + ], + "task_id": 4, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/3-pack-samsung-galaxy-s6-screen-protector-nearpow-tempered-glass-screen-protector-with-9h-hardness-crystal-clear-easy-bubble-free-installation-scratch-resist.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention about {{description}}", + "instantiation_dict": { + "description": "good fingerprint resistant" + }, + "intent": "List out reviewers, if exist, who mention about good fingerprint resistant", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Rachel", + "T. Gannon" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Rachel, T. Gannon, " + }, + "intent_template_id": 222, + "old_task_id": 23 + }, + { + "sites": [ + "reddit" + ], + "task_id": 5, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the {{forum}} forum.", + "instantiation_dict": { + "forum": "Showerthoughts" + }, + "intent": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the Showerthoughts forum.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 33, + "old_task_id": 27 + }, + { + "sites": [ + "map" + ], + "task_id": 6, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "I will arrive {{place}} soon. Provide the name of a {{target1}} in the vicinity, if available. Then, tell me the {{information}} to {{target2}} from the hotel.", + "instantiation_dict": { + "place": "Pittsburgh Airport", + "target1": "Hilton hotel", + "information": "the shortest walking distance", + "target2": "a supermarket" + }, + "intent": "I will arrive Pittsburgh Airport soon. Provide the name of a Hilton hotel in the vicinity, if available. Then, tell me the the shortest walking distance to a supermarket from the hotel.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "DoubleTree by Hilton Hotel Pittsburgh Airport", + "1.4km" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "DoubleTree by Hilton Hotel Pittsburgh Airport Distance: 1.4km" + }, + "intent_template_id": 78, + "old_task_id": 33 + }, + { + "sites": [ + "map" + ], + "task_id": 7, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Check if the {{place}} in pittsburgh can be reached in one hour by car from {{location}}", + "instantiation_dict": { + "place": "police station", + "location": "gates building at CMU" + }, + "intent": "Check if the police station in pittsburgh can be reached in one hour by car from gates building at CMU", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Yes" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Yes" + }, + "intent_template_id": 77, + "old_task_id": 37 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 8, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "List the top {{n}} search terms in my store", + "instantiation_dict": { + "n": "3" + }, + "intent": "List the top 3 search terms in my store", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "hollister", + "Joust Bag", + "nike" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "hollister, Joust Bag, nike" + }, + "intent_template_id": 285, + "old_task_id": 43 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 9, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Check out my todos", + "instantiation_dict": {}, + "intent": "Check out my todos", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/dashboard/todos", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 303, + "old_task_id": 44 + }, + { + "sites": [ + "shopping" + ], + "task_id": 10, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Today is 6/12/2023. Tell me how many fulfilled orders I have {{period}}, and the total amount of money I spent.", + "instantiation_dict": { + "period": "over the past three days" + }, + "intent": "Today is 6/12/2023. Tell me how many fulfilled orders I have over the past three days, and the total amount of money I spent.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "0 order", + "$0 total spend" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0 order, $0 total spend" + }, + "intent_template_id": 197, + "old_task_id": 48 + }, + { + "sites": [ + "map" + ], + "task_id": 11, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "How long does it take to walk from {{start}} to {{end}}?", + "instantiation_dict": { + "start": "Carnegie Museum of Art", + "end": "a library at CMU" + }, + "intent": "How long does it take to walk from Carnegie Museum of Art to a library at CMU?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "11 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "11 min" + }, + "intent_template_id": 68, + "old_task_id": 56 + }, + { + "sites": [ + "map" + ], + "task_id": 12, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Tell me the closest {{place1}}(s) to {{place2}}", + "instantiation_dict": { + "place1": "cafe", + "place2": "CMU Hunt library" + }, + "intent": "Tell me the closest cafe(s) to CMU Hunt library", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "De Fer Coffee & Tea" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "De Fer Coffee & Tea" + }, + "intent_template_id": 69, + "old_task_id": 58 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 13, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Which customer has completed the {{quantifier}} number of orders in the entire history?", + "instantiation_dict": { + "quantifier": "fifth most" + }, + "intent": "Which customer has completed the fifth most number of orders in the entire history?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Matt Baker" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Matt Baker" + }, + "intent_template_id": 276, + "old_task_id": 65 + }, + { + "sites": [ + "reddit" + ], + "task_id": 14, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Among the top {{number}} post in \"{{subreddit}}\" forum, {{description}}", + "instantiation_dict": { + "number": 10, + "subreddit": "books", + "description": "is there any post talks about supporting local book stores? If so, tell me the organizations involved" + }, + "intent": "Among the top 10 post in \"books\" forum, is there any post talks about supporting local book stores? If so, tell me the organizations involved", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "bookshop.org" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "bookshop.org" + }, + "intent_template_id": 17, + "old_task_id": 69 + }, + { + "sites": [ + "map" + ], + "task_id": 15, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the zip code of {{place}}?", + "instantiation_dict": { + "place": "Chatham University" + }, + "intent": "What is the zip code of Chatham University?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "15232" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "15232" + }, + "intent_template_id": 70, + "old_task_id": 71 + }, + { + "sites": [ + "map" + ], + "task_id": 16, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Given the following locations, {{place_list}}, what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.", + "instantiation_dict": { + "place_list": [ + "Massachusetts Institute of Technology", + "Harvard University", + "Boston Logan International Airport" + ] + }, + "intent": "Given the following locations, ['Massachusetts Institute of Technology', 'Harvard University', 'Boston Logan International Airport'], what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "The order is Massachusetts Institute of Technology, Harvard University, Boston Logan International Airport" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Massachusetts Institute of Technology, Harvard University, Boston Logan International Airport" + }, + "intent_template_id": 65, + "old_task_id": 75 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 17, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What is the total count of {{status}} reviews amongst all the reviews?", + "instantiation_dict": { + "status": "Pending" + }, + "intent": "What is the total count of Pending reviews amongst all the reviews?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "5" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "5" + }, + "intent_template_id": 277, + "old_task_id": 77 + }, + { + "sites": [ + "map" + ], + "task_id": 18, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the duration required to first walk from {{place_A}} to {{place_B}}, and then drive to {{place_C}}?", + "instantiation_dict": { + "place_A": "Massachusetts Institute of Technology", + "place_B": "Harvard University", + "place_C": "Boston Logan International Airport" + }, + "intent": "What is the duration required to first walk from Massachusetts Institute of Technology to Harvard University, and then drive to Boston Logan International Airport?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "64 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "63 min" + }, + "intent_template_id": 72, + "old_task_id": 82 + }, + { + "sites": [ + "map" + ], + "task_id": 19, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "From my stay at {{hotel}}, what's the estimated driving time to reach {{place}}?", + "instantiation_dict": { + "hotel": "Homewood Suites Southpointe", + "place": "PPG Paints Arena" + }, + "intent": "From my stay at Homewood Suites Southpointe, what's the estimated driving time to reach PPG Paints Arena?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "34 minutes" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "34 minutes" + }, + "intent_template_id": 64, + "old_task_id": 88 + }, + { + "sites": [ + "map" + ], + "task_id": 20, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Which US states border {{state}}?", + "instantiation_dict": { + "state": "New Hampshire" + }, + "intent": "Which US states border New Hampshire?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Massachusetts", + "Vermont", + "Maine" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Massachusetts, Vermont, Maine" + }, + "intent_template_id": 67, + "old_task_id": 93 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 21, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Telll me the grand total of invoice {{id}}.", + "instantiation_dict": { + "id": "000000002" + }, + "intent": "Telll me the grand total of invoice 000000002.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "39.64" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$39.64" + }, + "intent_template_id": 274, + "old_task_id": 95 + }, + { + "sites": [ + "shopping" + ], + "task_id": 22, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Tell me the status of my latest order and when will it arrive", + "instantiation_dict": {}, + "intent": "Tell me the status of my latest order and when will it arrive", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "The last order was canceled. It will never arrive." + ] + }, + "reference_url": "", + "program_html": [], + "reference_answer_raw_annotation": "The last order was canceled. It will never arrive.", + "string_note": "" + }, + "intent_template_id": 193, + "old_task_id": 96 + }, + { + "sites": [ + "map", + "wikipedia" + ], + "task_id": 23, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Tell me the distance to drive from Carnegie Mellon University to the top computer science school in massachusetts", + "instantiation_dict": {}, + "intent": "Tell me the distance to drive from Carnegie Mellon University to the top computer science school in massachusetts", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "914" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "914 km" + }, + "intent_template_id": 120, + "old_task_id": 97 + }, + { + "sites": [ + "map" + ], + "task_id": 24, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Where is the nearest {{places}} to {{start}}, and what is the walking distance to it?", + "instantiation_dict": { + "places": "tea cafe", + "start": "University of Pittsburgh" + }, + "intent": "Where is the nearest tea cafe to University of Pittsburgh, and what is the walking distance to it?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Fuku Tea", + "3716", + "Forbes Avenue", + "Central Oakland", + "Pittsburgh", + "653m" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Fuku Tea, 3716, Forbes Avenue, Oakland, Central Oakland, Pittsburgh, Allegheny County, Pennsylvania, 15213, United States\n653m" + }, + "intent_template_id": 66, + "old_task_id": 98 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 25, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Display the list of issues in the {{repo}} repository that have labels related to {{label}}", + "instantiation_dict": { + "label": "questions", + "repo": "kkroening/ffmpeg-python" + }, + "intent": "Display the list of issues in the kkroening/ffmpeg-python repository that have labels related to questions", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/kkroening/ffmpeg-python/-/issues/?sort=created_date&state=opened&label_name%5B%5D=question&first_page_size=20", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 349, + "old_task_id": 103 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 26, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Presents the monthly count of successful orders {{period}} in MM:COUNT format", + "instantiation_dict": { + "period": "from Jan to December 2022" + }, + "intent": "Presents the monthly count of successful orders from Jan to December 2022 in MM:COUNT format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "01:11", + "02:16", + "03:14", + "04:8", + "05:8", + "06:13", + "07:9", + "08:8", + "09:10", + "10:4", + "11:5", + "12:10" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "01:11 02:16 03:14 04:8 05:8 06:13 07:9 08:8 09:10 10:4 11:5 12:10" + }, + "intent_template_id": 270, + "old_task_id": 109 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 27, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Show me the name of the customers who have expressed dissatisfaction with {{product}}", + "instantiation_dict": { + "product": "Chloe tank" + }, + "intent": "Show me the name of the customers who have expressed dissatisfaction with Chloe tank", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no negative review for Chloe tank", + "reference_answer_raw_annotation": "" + }, + "intent_template_id": 245, + "old_task_id": 115 + }, + { + "sites": [ + "shopping" + ], + "task_id": 28, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What is the date when I made my first purchase on this site?", + "instantiation_dict": {}, + "intent": "What is the date when I made my first purchase on this site?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "3/2/22" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3/2/22" + }, + "intent_template_id": 161, + "old_task_id": 117 + }, + { + "sites": [ + "shopping" + ], + "task_id": 29, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I have jaw bruxism problem, show me something that could alleviate the problem.", + "instantiation_dict": {}, + "intent": "I have jaw bruxism problem, show me something that could alleviate the problem.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "", + "required_contents": { + "must_include": [ + "jaw bruxism", + "mouth guard" + ] + } + } + ] + }, + "intent_template_id": 151, + "old_task_id": 118 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 30, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Tell me the reasons why customers like {{product}}", + "instantiation_dict": { + "product": "Circe's products" + }, + "intent": "Tell me the reasons why customers like Circe's products", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Warm and comfortable. True to size." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Warm and comfortable. True to size." + }, + "intent_template_id": 250, + "old_task_id": 123 + }, + { + "sites": [ + "shopping" + ], + "task_id": 31, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What is the price range of {{product}} in the One Stop Market?", + "instantiation_dict": { + "product": "teeth grinding mouth guard" + }, + "intent": "What is the price range of teeth grinding mouth guard in the One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1.46", + "85" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$1.46 - $85" + }, + "intent_template_id": 159, + "old_task_id": 125 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 32, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What brands appear most frequently among the top search terms?", + "instantiation_dict": {}, + "intent": "What brands appear most frequently among the top search terms?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Hollister" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Hollister" + }, + "intent_template_id": 1001, + "old_task_id": 127 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 33, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What's the total number of items sold in the most recent {{k}} orders?", + "instantiation_dict": { + "k": "7" + }, + "intent": "What's the total number of items sold in the most recent 7 orders?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "25" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "25" + }, + "intent_template_id": 1002, + "old_task_id": 131 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 34, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "How many commits did {{user}} make to {{repo}} on {{date}}?", + "instantiation_dict": { + "user": "Eric and Kilian", + "repo": "a11yproject", + "date": "1/3/2023" + }, + "intent": "How many commits did Eric and Kilian make to a11yproject on 1/3/2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 322, + "old_task_id": 135 + }, + { + "sites": [ + "map" + ], + "task_id": 35, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the estimated driving time between {{city1}} and {{city2}}?", + "instantiation_dict": { + "city1": "the hometown of Joe Biden", + "city2": "Bridgeport" + }, + "intent": "What is the estimated driving time between the hometown of Joe Biden and Bridgeport?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "3h 20min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3h 20min" + }, + "intent_template_id": 51, + "old_task_id": 139 + }, + { + "sites": [ + "shopping" + ], + "task_id": 36, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "How much I spent on {{category}} shopping during {{time}}", + "instantiation_dict": { + "category": "food", + "time": "from mid Jan to the end Jan 2023" + }, + "intent": "How much I spent on food shopping during from mid Jan to the end Jan 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 162, + "old_task_id": 144 + }, + { + "sites": [ + "shopping" + ], + "task_id": 37, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What is the {{option}} configuration of the {{product}} I bought {{time}}", + "instantiation_dict": { + "option": "color", + "product": "artifical plants", + "time": "Feb 2023" + }, + "intent": "What is the color configuration of the artifical plants I bought Feb 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Green-vines" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Green-vines" + }, + "intent_template_id": 155, + "old_task_id": 149 + }, + { + "sites": [ + "map" + ], + "task_id": 38, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the minimum travel time by car from {{location1}} to {{location2}}?", + "instantiation_dict": { + "location1": "Animal Rescue League of Pittsburgh", + "location2": "Schenley park" + }, + "intent": "What is the minimum travel time by car from Animal Rescue League of Pittsburgh to Schenley park?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "9min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "9min" + }, + "intent_template_id": 36, + "old_task_id": 155 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 39, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Checkout merge requests assigned to me", + "instantiation_dict": {}, + "intent": "Checkout merge requests assigned to me", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/dashboard/merge_requests?assignee_username=byteblaze", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 290, + "old_task_id": 156 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 40, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Show all customers", + "instantiation_dict": {}, + "intent": "Show all customers", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/customer/index/", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 255, + "old_task_id": 157 + }, + { + "sites": [ + "shopping" + ], + "task_id": 41, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all {{num}} cards", + "instantiation_dict": { + "num": 40 + }, + "intent": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all 40 cards", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/game-card-holder-storage-case-for-nintendo-switch-games-or-ps-vita-game-case-or-sd-memory-cards-black.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 171, + "old_task_id": 162 + }, + { + "sites": [ + "shopping" + ], + "task_id": 42, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/photosmart-plus-b209-clr-inkjetfb-p-s-c-usb-wrls-1.html", + "geolocation": null, + "intent_template": "What are the main criticisms of this product? Please extract the relevant sentences.", + "instantiation_dict": {}, + "intent": "What are the main criticisms of this product? Please extract the relevant sentences.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "The wireless connection works on a whim (about 40% of the time I've owned it)", + "It seems to constantly run out of ink", + "Cartridge prices are less than some printers I've had", + "This printer seems to have more reasons NOT to work (none that are findable or correctable) Ex: error boxes saying that it's out of paper when it automatically switches to photo printing for some reason", + "Scanner is as slow as my first scanner I ever owned in the mid-90's", + "For the $176 I paid, there isn't even a fax component on it. I guess the \"PLUS\" part of it's name is in reference to the migraines it causes when you can't figure out the new reason why it's not working for the 10th time in the past 2 months." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "The wireless connection works on a whim (about 40% of the time I've owned it). It seems to constantly run out of ink. Cartridge prices are less than some printers I've had, but now I understand why. This printer seems to have more reasons NOT to work (none that are findable or correctable) Ex: error boxes saying that it's out of paper when it automatically switches to photo printing for some reason. Scanner is as slow as my first scanner I ever owned in the mid-90's. For the $176 I paid, there isn't even a fax component on it. I guess the \"PLUS\" part of it's name is in reference to the migraines it causes when you can't figure out the new reason why it's not working for the 10th time in the past 2 months." + }, + "intent_template_id": 136, + "old_task_id": 167 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 43, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Tell me the full names of the repositories where I made contributions and they got {{description}} stars?", + "instantiation_dict": { + "description": "the most" + }, + "intent": "Tell me the full names of the repositories where I made contributions and they got the most stars?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "a11yproject.com", + "design" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "a11yproject.com, Primer/design" + }, + "intent_template_id": 289, + "old_task_id": 169 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 44, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Open my latest updated issue that has keyword \"{{keyword}}\" in its title to check if it is closed", + "instantiation_dict": { + "keyword": "better" + }, + "intent": "Open my latest updated issue that has keyword \"better\" in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "fuzzy_match": [ + "No, it is open" + ] + }, + "reference_url": "http://localhost:28084/byteblaze/empathy-prompts/-/issues/8", + "program_html": [], + "reference_answer_raw_annotation": "Not closed", + "string_note": "", + "url_note": "GOLD in PRED" + }, + "intent_template_id": 310, + "old_task_id": 173 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 45, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Open my latest created issue that has {{keyword}} in its title to check if it is closed", + "instantiation_dict": { + "keyword": "homepage content" + }, + "intent": "Open my latest created issue that has homepage content in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "Yes" + }, + "reference_url": "http://localhost:28084/a11yproject/a11yproject.com/-/issues/719", + "program_html": [], + "reference_answer_raw_annotation": "closed", + "string_note": "" + }, + "intent_template_id": 500, + "old_task_id": 182 + }, + { + "sites": [ + "shopping" + ], + "task_id": 46, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Tell me the total cost of my latest {{status}} order?", + "instantiation_dict": { + "status": "complete" + }, + "intent": "Tell me the total cost of my latest complete order?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "65.32" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "65.32" + }, + "intent_template_id": 214, + "old_task_id": 190 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 47, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Compare the payment difference of the last {{N}} {{status_1}} orders and {{status_2}} orders", + "instantiation_dict": { + "status_1": "cancelled", + "status_2": "completed", + "N": "4" + }, + "intent": "Compare the payment difference of the last 4 cancelled orders and completed orders", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "194.25" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "194.25" + }, + "intent_template_id": 367, + "old_task_id": 196 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 48, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "date", + "status": "most recent canlled" + }, + "intent": "Get the date of the most recent canlled order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "May 23 2023" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "May 23, 2023" + }, + "intent_template_id": 366, + "old_task_id": 202 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 49, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make on {{date}}?", + "instantiation_dict": { + "user": "kilian", + "date": "3/5/2023" + }, + "intent": "How many commits did kilian make on 3/5/2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1" + }, + "intent_template_id": 320, + "old_task_id": 205 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 50, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Find the customer name and email with phone number {{PhoneNum}}", + "instantiation_dict": { + "PhoneNum": "8015551212" + }, + "intent": "Find the customer name and email with phone number 8015551212", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Sean Miller", + "sean.miller@gmail.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Sean Miller, sean.miller@gmail.com" + }, + "intent_template_id": 364, + "old_task_id": 211 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 51, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "What are the key aspects that the customers don't like about {{product}}", + "instantiation_dict": { + "product": "Circe ice fleece" + }, + "intent": "What are the key aspects that the customers don't like about Circe ice fleece", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": ["N/A"] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 249, + "old_task_id": 215 + }, + { + "sites": [ + "map" + ], + "task_id": 52, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Show me the walking distance from nearby hotels to {{location}} that take at most {{n}} minutes?", + "instantiation_dict": { + "location": "Gardner Steel Conference Center,", + "n": 5 + }, + "intent": "Show me the walking distance from nearby hotels to Gardner Steel Conference Center, that take at most 5 minutes?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Wyndham Pittsburgh University Cente: 375m", + "The Oaklander Hotel: 338m" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Wyndham Pittsburgh University Cente: 375 m\nThe Oaklander Hotel: 338 m" + }, + "intent_template_id": 41, + "old_task_id": 220 + }, + { + "sites": [ + "map" + ], + "task_id": 53, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "I am at CMU Pittsburgh, how long it takes to the nearest {{location}} with different transportation methods?", + "instantiation_dict": { + "location": "USPS postal office" + }, + "intent": "I am at CMU Pittsburgh, how long it takes to the nearest USPS postal office with different transportation methods?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Walk: 1 minute", + "Drive: less than 1 minute", + "Bike: less than 1 minute" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Walk: 1 minute to walk and\nDrive: less than 1 minute\nBike: less than 1 minute" + }, + "intent_template_id": 35, + "old_task_id": 221 + }, + { + "sites": [ + "shopping" + ], + "task_id": 54, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What do customers say about {{product_type}} from {{manufature}}", + "instantiation_dict": { + "product_type": "brush", + "manufature": "sephora" + }, + "intent": "What do customers say about brush from sephora", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "No reviews available for the sephora brushes." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "The sephora brushes don't have reviews", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 135, + "old_task_id": 225 + }, + { + "sites": [ + "shopping" + ], + "task_id": 55, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What is the price range for products from {{brand}}?", + "instantiation_dict": { + "brand": "EYZUTAK" + }, + "intent": "What is the price range for products from EYZUTAK?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "9.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$9.99" + }, + "intent_template_id": 370, + "old_task_id": 227 + }, + { + "sites": [ + "shopping" + ], + "task_id": 56, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Get the order number of my most recent {{status}} order ", + "instantiation_dict": { + "status": "under delivery" + }, + "intent": "Get the order number of my most recent under delivery order ", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "N/A" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "There is no under delivery order" + }, + "intent_template_id": 213, + "old_task_id": 235 + }, + { + "sites": [ + "map" + ], + "task_id": 57, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Where is the nearest {{location}} from {{location2}} {{condition}}", + "instantiation_dict": { + "location": "pharmacy", + "location2": "Carnegie Mellon", + "condition": "I can walk within 20mins" + }, + "intent": "Where is the nearest pharmacy from Carnegie Mellon I can walk within 20mins", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Schiller's Pharmacy", + "811", + "South Aiken Avenue", + "Shadyside", + "Pittsburgh" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Schiller's Pharmacy, 811, South Aiken Avenue, Shadyside, Pittsburgh, Allegheny County, 15232, United States" + }, + "intent_template_id": 39, + "old_task_id": 236 + }, + { + "sites": [ + "shopping" + ], + "task_id": 58, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I am doing a market survey for one stop market, show me the most expensive product from {{product_category}} category", + "instantiation_dict": { + "product_category": "competative swimwear" + }, + "intent": "I am doing a market survey for one stop market, show me the most expensive product from competative swimwear category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/clothing-shoes-jewelry/sport-specific-clothing/competitive-swimwear.html?product_list_order=price&product_list_dir=desc", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 138, + "old_task_id": 240 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 59, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Show me the {{information}} of the customer who is the most unhappy with {{product}}", + "instantiation_dict": { + "information": "email address", + "product": "the style of Zoe products" + }, + "intent": "Show me the email address of the customer who is the most unhappy with the style of Zoe products", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "Valorie doesn't have a email in the system", + "program_html": [], + "string_note": "There is no negative review for Zoe products", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 244, + "old_task_id": 247 + }, + { + "sites": [ + "map" + ], + "task_id": 60, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Tell me the coordinates of {{location}} in DD format", + "instantiation_dict": { + "location": "Apple Store near Pitt" + }, + "intent": "Tell me the coordinates of Apple Store near Pitt in DD format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "40.451", + "-79.933" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "40.4511693, -79.9334241" + }, + "intent_template_id": 46, + "old_task_id": 250 + }, + { + "sites": [ + "map" + ], + "task_id": 61, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What is the {{information}} of {{location}}", + "instantiation_dict": { + "location": "Western Pennsylvania Hospital", + "information": "phone number" + }, + "intent": "What is the phone number of Western Pennsylvania Hospital", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "+1 412 578 5000" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "4125785000" + }, + "intent_template_id": 501, + "old_task_id": 254 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 62, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "See all public projects", + "instantiation_dict": {}, + "intent": "See all public projects", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/explore", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 325, + "old_task_id": 258 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 63, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Get me my RSS feed token", + "instantiation_dict": {}, + "intent": "Get me my RSS feed token", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "TMN_bBn9Z48qVbUFZV45" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "TMN_bBn9Z48qVbUFZV45" + }, + "intent_template_id": 312, + "old_task_id": 259 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 64, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "What's the closest national park to {{city}}? How long does it take to bike there?", + "instantiation_dict": { + "city": "Vinalhaven, ME" + }, + "intent": "What's the closest national park to Vinalhaven, ME? How long does it take to bike there?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Acadia National Park" + ], + "fuzzy_match": [ + "10h 33min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Acadia National Park\n10h 33min" + }, + "intent_template_id": 85, + "old_task_id": 268 + }, + { + "sites": [ + "shopping" + ], + "task_id": 65, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show me products under ${{price}} in \"{{product_category}}\" category", + "instantiation_dict": { + "price": "30", + "product_category": "men shoes" + }, + "intent": "Show me products under $30 in \"men shoes\" category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/clothing-shoes-jewelry/men/shoes.html?price=0-30", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 139, + "old_task_id": 270 + }, + { + "sites": [ + "shopping" + ], + "task_id": 66, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Search for \"{{keyword}}\"", + "instantiation_dict": { + "keyword": "switch accessories" + }, + "intent": "Search for \"switch accessories\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/catalogsearch/result/?q=switch+accessories", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 212, + "old_task_id": 276 + }, + { + "sites": [ + "shopping" + ], + "task_id": 67, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Look up the most recent models of XBox controllers released between 2020-2021?", + "instantiation_dict": {}, + "intent": "Look up the most recent models of XBox controllers released between 2020-2021?", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/microsoft-xbox-controller-carbon-black-for-series-x-series-s-xbox-one-windows-10-android-ios-bundled-with-dual-port-charging-dock-xbox-controller-skin-voucher-premgear-cloth.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 210, + "old_task_id": 283 + }, + { + "sites": [ + "shopping" + ], + "task_id": 68, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show the least expensive {{product}} with a minimum storage capacity of {{min_storage}}.", + "instantiation_dict": { + "product": "switch card holder", + "min_storage": "15 cards" + }, + "intent": "Show the least expensive switch card holder with a minimum storage capacity of 15 cards.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/game-card-holder-storage-case-for-nintendo-switch-games-or-ps-vita-game-case-or-sd-memory-cards-black.html", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 207, + "old_task_id": 285 + }, + { + "sites": [ + "map" + ], + "task_id": 69, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "How much time does it take from Pittsburgh to Philadelphia by car?", + "instantiation_dict": {}, + "intent": "How much time does it take from Pittsburgh to Philadelphia by car?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "8h 33min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "5h 47min" + }, + "intent_template_id": 47, + "old_task_id": 287 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 70, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the customer who has the most cancellations in the history", + "instantiation_dict": { + "attribute": "name" + }, + "intent": "Tell me the name of the customer who has the most cancellations in the history", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Samantha Jones" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Samantha Jones" + }, + "intent_template_id": 234, + "old_task_id": 288 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 71, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Show me the command to clone {{repo}} with SSH.", + "instantiation_dict": { + "repo": "the best GAN python implementation" + }, + "intent": "Show me the command to clone the best GAN python implementation with SSH.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "ssh://git@localhost:2222/eriklindernoren/PyTorch-GAN.git" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "ssh://git@localhost:2222/eriklindernoren/PyTorch-GAN.git" + }, + "intent_template_id": 329, + "old_task_id": 296 + }, + { + "sites": [ + "shopping" + ], + "task_id": 72, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show the most recent {{status}} order", + "instantiation_dict": { + "status": "pending" + }, + "intent": "Show the most recent pending order", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/sales/order/view/order_id/189/", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 180, + "old_task_id": 300 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 73, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Tell me who has made the most contributions, in terms of number of commits, to the {{repo}} project", + "instantiation_dict": { + "repo": "Pytorch GAN" + }, + "intent": "Tell me who has made the most contributions, in terms of number of commits, to the Pytorch GAN project", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Erik Linder-Norén" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Erik Linder-Norén" + }, + "intent_template_id": 323, + "old_task_id": 311 + }, + { + "sites": [ + "shopping" + ], + "task_id": 74, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Which number to call for the customer service?", + "instantiation_dict": {}, + "intent": "Which number to call for the customer service?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no phone number in the website", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 134, + "old_task_id": 313 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 75, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "List the {{attribute}} of the top 3 contributors to {{repo}} repo, ranked by the number of commits?", + "instantiation_dict": { + "repo": "2019-nCov", + "attribute": "last names" + }, + "intent": "List the last names of the top 3 contributors to 2019-nCov repo, ranked by the number of commits?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Lo", + "Chen", + "Chu" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lo, Chen, Chu" + }, + "intent_template_id": 324, + "old_task_id": 318 + }, + { + "sites": [ + "shopping" + ], + "task_id": 76, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "How much refund I should expect from my order canlled in {{time}}, including shipping fee", + "instantiation_dict": { + "time": "2022" + }, + "intent": "How much refund I should expect from my order canlled in 2022, including shipping fee", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "3053.97" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3053.97" + }, + "intent_template_id": 160, + "old_task_id": 321 + }, + { + "sites": [ + "shopping" + ], + "task_id": 77, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show me the \"{{product}}\" listings by {{sorting_order}}.", + "instantiation_dict": { + "product": "chairs", + "sorting_order": "ascending price" + }, + "intent": "Show me the \"chairs\" listings by ascending price.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/catalogsearch/result/index/?product_list_order=price&q=chairs&product_list_dir=asc", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 208, + "old_task_id": 324 + }, + { + "sites": [ + "shopping" + ], + "task_id": 78, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "How much did I spend on shopping at One Stop Market {{time}}? They gave me a 20% discount on the total amount for orders exceeding $200 in cash", + "instantiation_dict": { + "time": "on November 2022" + }, + "intent": "How much did I spend on shopping at One Stop Market on November 2022? They gave me a 20% discount on the total amount for orders exceeding $200 in cash", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "359.546" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "359.546" + }, + "intent_template_id": 147, + "old_task_id": 333 + }, + { + "sites": [ + "shopping" + ], + "task_id": 79, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Tell me when I last ordered my {{description}}?", + "instantiation_dict": { + "description": "body butter" + }, + "intent": "Tell me when I last ordered my body butter?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "January 16th 2023" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "January 16th 2023" + }, + "intent_template_id": 169, + "old_task_id": 335 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 80, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "How many reviews our shop received {{time}}?", + "instantiation_dict": { + "time": "in May 2023" + }, + "intent": "How many reviews our shop received in May 2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 248, + "old_task_id": 348 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 81, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Who else have access to my repo {{repo}}, show me their usernames", + "instantiation_dict": { + "repo": "gimmiethat.space" + }, + "intent": "Who else have access to my repo gimmiethat.space, show me their usernames", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": "yjlou" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "yjlou" + }, + "intent_template_id": 298, + "old_task_id": 349 + }, + { + "sites": [ + "shopping" + ], + "task_id": 82, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "List products from {{product_category}} category by {{order}} price", + "instantiation_dict": { + "product_category": "living room furtniture", + "order": "descending" + }, + "intent": "List products from living room furtniture category by descending price", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/home-kitchen/furniture/living-room-furniture.html?product_list_order=price&product_list_dir=desc", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 137, + "old_task_id": 354 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 83, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Checkout merge requests requiring my review", + "instantiation_dict": {}, + "intent": "Checkout merge requests requiring my review", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/dashboard/merge_requests?reviewer_username=byteblaze", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 291, + "old_task_id": 357 + }, + { + "sites": [ + "shopping" + ], + "task_id": 84, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Show me the {{info}} for order number {{order_number}}.", + "instantiation_dict": { + "info": "order statuses", + "order_number": "170 and 189" + }, + "intent": "Show me the order statuses for order number 170 and 189.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "170: cancelled", + "189: pending" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "170: cancelled, 189: pending" + }, + "intent_template_id": 206, + "old_task_id": 361 + }, + { + "sites": [ + "map" + ], + "task_id": 85, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Measure distance between {{location/address_1}} and {{location/address_2}} by walking", + "instantiation_dict": { + "location/address_1": "Carnegie Mellon University", + "location/address_2": "CVS (closet one)" + }, + "intent": "Measure distance between Carnegie Mellon University and CVS (closet one) by walking", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "1.4km" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1.4km" + }, + "intent_template_id": 58, + "old_task_id": 367 + }, + { + "sites": [ + "shopping" + ], + "task_id": 86, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "find discounted items.", + "instantiation_dict": {}, + "intent": "find discounted items.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no function to show only discount items", + "reference_answer_raw_annotation": "There is no function to show only discount items." + }, + "intent_template_id": 188, + "old_task_id": 368 + }, + { + "sites": [ + "map" + ], + "task_id": 87, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Pull up the description page of {{location}} on Map", + "instantiation_dict": { + "location": "Carnegie Music Hall" + }, + "intent": "Pull up the description page of Carnegie Music Hall on Map", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Carnegie Music Hall" + ] + } + } + ] + }, + "intent_template_id": 52, + "old_task_id": 369 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 88, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Preview the {{name}} theme for my shop", + "instantiation_dict": { + "name": "Magento Blank" + }, + "intent": "Preview the Magento Blank theme for my shop", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/admin/system_design_theme/edit/id/1", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 266, + "old_task_id": 374 + }, + { + "sites": [ + "shopping" + ], + "task_id": 89, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Summarize customer reviews for {{product}}.", + "instantiation_dict": { + "product": "Amazon Echo Dot 3rd generation" + }, + "intent": "Summarize customer reviews for Amazon Echo Dot 3rd generation.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Serena: It was the exact same one that came with my echo show that my puppy", + "Amazon Customer: It doesn’t work .", + "BlessedGirl: Great replacement for Echo Dot.", + "Jeff Sederquist: Great product no issues. It's for a Echo show 5", + "melvin m.: Very good charger but I had to buy this one because when I bought an echo show 5, I came with an echo show 10 charger in the package with the device", + "Jack Smith: Looks like pictures", + "Ignacio: Muy bueno de buena calidad", + "BusyB: My kids bent the original cord for their Echo show 5 and this replacement cord is just like the one it came with. It works just fine. Been using it for 2 weeks now and no problems.", + "Kee Williams: Great value works better than original", + "Howard A. Sinclair: Great product!!! Works exactly as it is supposed to." + ] + }, + "reference_url": "", + "program_html": [] + }, + "intent_template_id": 182, + "old_task_id": 376 + }, + { + "sites": [ + "map" + ], + "task_id": 90, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Find the {{space}} around {{location}}", + "instantiation_dict": { + "location": "Carnegie Music Hall", + "space": "hotel" + }, + "intent": "Find the hotel around Carnegie Music Hall", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "https://www.openstreetmap.org/search?query=hotels%20near%20Carnegie%20Music%20Hall", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 59, + "old_task_id": 381 + }, + { + "sites": [ + "map" + ], + "task_id": 91, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "I am arriving at Carnegie Mellon University. Find the nearby US Citizenship and Immigration Services and the walking distance to the nearest Social Security Administration from US Citizenship and Immigration Services", + "instantiation_dict": {}, + "intent": "I am arriving at Carnegie Mellon University. Find the nearby US Citizenship and Immigration Services and the walking distance to the nearest Social Security Administration from US Citizenship and Immigration Services", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "N/A" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no USCIS nearby", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 781, + "old_task_id": 382 + }, + { + "sites": [ + "map" + ], + "task_id": 92, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "I am arriving at Pittsburgh Airport. Show me the name of a Hyatt hotel if there is any nearby. Tell me the names of supermarkets that are within 15mins driving from the hotel", + "instantiation_dict": {}, + "intent": "I am arriving at Pittsburgh Airport. Show me the name of a Hyatt hotel if there is any nearby. Tell me the names of supermarkets that are within 15mins driving from the hotel", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Hyatt Regency Pittsburgh International Airport", + "Giant Eagle", + "ALDI" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Hyatt Regency Pittsburgh International Airport Giant Eagle, ALDI" + }, + "intent_template_id": 782, + "old_task_id": 383 + }, + { + "sites": [ + "shopping" + ], + "task_id": 93, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "List the customer names who complain about the quality of EYZUTAK phone cases", + "instantiation_dict": {}, + "intent": "List the customer names who complain about the quality of EYZUTAK phone cases", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Lisa Lee", + "Evelyn Kurver", + "Amanda", + "N Randall" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lisa Lee, Evelyn Kurver, Amanda, N Randall" + }, + "intent_template_id": 666, + "old_task_id": 384 + }, + { + "sites": [ + "shopping" + ], + "task_id": 94, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "What is the rating of {{product}}", + "instantiation_dict": { + "product": "Ugreen lightning to 3.5mm cable" + }, + "intent": "What is the rating of Ugreen lightning to 3.5mm cable. Please round to the nearest whole number", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "65 |OR| 3" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Around 3.1 stars" + }, + "intent_template_id": 1355, + "old_task_id": 386 + }, + { + "sites": [ + "shopping" + ], + "task_id": 95, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Who gave {{stars}} for phone cases from EYZUTAK", + "instantiation_dict": { + "stars": "4 or 5 stars" + }, + "intent": "Who gave 4 or 5 stars for phone cases from EYZUTAK", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "MH", + "Misba009", + "Amanda", + "Amazon Customer", + "Cally", + "Bethany Robertson" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "MH, Misba009, Amanda, Amazon Customer, Cally, Bethany Robertson, Bethany Robertson" + }, + "intent_template_id": 1356, + "old_task_id": 387 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 96, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Post \"{{content}}\" for the merge request related to {{mr}} in {{repo}} project", + "instantiation_dict": { + "mr": "color ulitity", + "repo": "a11yproject.com", + "content": "Good idea" + }, + "intent": "Post \"Good idea\" for the merge request related to color ulitity in a11yproject.com project", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/a11yproject/a11yproject.com/-/merge_requests/1071", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.querySelector('.timeline-discussion-body').outerText", + "required_contents": { + "exact_match": "Good idea" + } + } + ] + }, + "intent_template_id": 348, + "old_task_id": 392 + }, + { + "sites": [ + "reddit" + ], + "task_id": 97, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Change my reddit bio to \"{{content}}\"", + "instantiation_dict": { + "content": "Seeking SDE positions" + }, + "intent": "Change my reddit bio to \"Seeking SDE positions\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/user/MarvelsGrantMan136", + "locator": "document.querySelector(\".user-bio__biography\").outerText", + "required_contents": { + "exact_match": "Seeking SDE positions" + } + } + ] + }, + "intent_template_id": 6, + "old_task_id": 401 + }, + { + "sites": [ + "reddit" + ], + "task_id": 98, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Upvote the newest post in {{subreddit}} subreddit", + "instantiation_dict": { + "subreddit": "books" + }, + "intent": "Upvote the newest post in books subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/books/124260/adults-reading-to-each-other-out-loud", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 22, + "old_task_id": 404 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 99, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Go to the merge request on {{topic}} I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.", + "instantiation_dict": { + "topic": "verification functions" + }, + "intent": "Go to the merge request on verification functions I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/a11y-webring.club/-/merge_requests/40", + "locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.querySelector('.timeline-discussion-body').outerText", + "required_contents": { + "exact_match": "@davepgreene" + } + } + ] + }, + "intent_template_id": 360, + "old_task_id": 415 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 100, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Set my gitlab status as {{status}}.", + "instantiation_dict": { + "status": "Enjoying life" + }, + "intent": "Set my gitlab status as Enjoying life.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze", + "locator": "document.querySelector('.cover-status').lastChild.textContent", + "required_contents": { + "exact_match": "Enjoying life" + } + } + ] + }, + "intent_template_id": 361, + "old_task_id": 419 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 101, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Mark all {{brand}} shirts on sale", + "instantiation_dict": { + "brand": "Hollister" + }, + "intent": "Mark all Hollister shirts on sale", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/126/", + "locator": "document.querySelector('input[name=\"product[sale]\"]').value", + "required_contents": { + "exact_match": "1" + } + } + ] + }, + "intent_template_id": 237, + "old_task_id": 423 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 102, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the place in Pennsylvania where a plane crashed during the September 11th attacks" + }, + "intent": "Find the page of the place in Pennsylvania where a plane crashed during the September 11th attacks on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": { + "must_include": [ + "Somerset County" + ] + } + } + ] + }, + "intent_template_id": 371, + "old_task_id": 426 + }, + { + "sites": [ + "shopping" + ], + "task_id": 103, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/tall-pink-taper-candles-4-piece-orange-colored-tapered-candles-gradient-candles-10-6-inches-tall-tie-dye-candle-set-large-dripless-long-burning-candlesticks-two-color-taper-candles-candlesticks.html |AND| http://localhost:28082/spaas-white-taper-candles-4-pack-10-inch-tall-candles-scent-free-premium-wax-candle-sticks-8-hour-long-burning-white-candlesticks-for-home-decoration-wedding-holiday-and-parties.html |AND| http://localhost:28082/white-starfish-wall-candle-sconces-set-of-2-beach-decor-ocean-themed-wall-mount-candleholders-nautical-style-beach-bathroom-decor-coastal-farmhouse-seashell-candle-holders.html", + "geolocation": null, + "intent_template": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "instantiation_dict": {}, + "intent": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/checkout/cart", + "locator": "", + "required_contents": { + "must_include": [ + "SPAAS White Taper Candles - 4 Pack |OR| 10 Inch Tall Candles, Scent-Free Premium Wax Candle Sticks |OR| 8 Hour Long Burning White Candlesticks for Home Decoration, Wedding, Holiday and Parties" + ] + } + } + ] + }, + "intent_template_id": 145, + "old_task_id": 431 + }, + { + "sites": [ + "shopping" + ], + "task_id": 104, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I previously ordered some {{product}} {{time}} and later cancelled. Can you reorder it for me?", + "instantiation_dict": { + "product": "a make up removal kit", + "time": "during summer 2022" + }, + "intent": "I previously ordered some a make up removal kit during summer 2022 and later cancelled. Can you reorder it for me?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B0738JQG6Q" + ] + } + } + ] + }, + "intent_template_id": 156, + "old_task_id": 440 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 105, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "set the homepage URL on my GitLab profile to {{url}}", + "instantiation_dict": { + "url": "https://egg.tart.com" + }, + "intent": "set the homepage URL on my GitLab profile to https://egg.tart.com", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze", + "locator": "document.querySelector('.profile-header [itemprop=\"url\"]').outerText", + "required_contents": { + "exact_match": "egg.tart.com" + } + } + ] + }, + "intent_template_id": 331, + "old_task_id": 448 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 106, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Disable {{product}} from the site, they are facing some quality issues.", + "instantiation_dict": { + "product": "Ryker Tee Crew Neck" + }, + "intent": "Disable Ryker Tee Crew Neck from the site, they are facing some quality issues.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/478/", + "locator": "document.querySelector('[name=\"product[status]\"').value", + "required_contents": { + "exact_match": "2" + } + } + ] + }, + "intent_template_id": 242, + "old_task_id": 454 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 107, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin/catalog/product/edit/id/1481/", + "geolocation": null, + "intent_template": "{{action}} the price of this product by {{amount}}", + "instantiation_dict": { + "amount": "$5", + "action": "Reduce" + }, + "intent": "Reduce the price of this product by $5", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1481/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": { + "exact_match": "27.00" + } + } + ] + }, + "intent_template_id": 247, + "old_task_id": 458 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 108, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Update the product description of {{product}} to highlight the real user positive reviews by quoting the comments", + "instantiation_dict": { + "product": "Antonia Racer Tank" + }, + "intent": "Update the product description of Antonia Racer Tank to highlight the real user positive reviews by quoting the comments", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/../antonia-racer-tank.html", + "locator": "document.querySelector('.data.item.content').outerText + (document.querySelector('.product.attribute.overview [itemprop=\"description\"]')?.outerText || '')", + "required_contents": { + "must_include": [ + "This is in regular rotation at the gym", + "Its colorful and looks kinda cute under my exercise tanks", + "it's very stylish for yoga or something else low impact" + ] + } + } + ] + }, + "intent_template_id": 251, + "old_task_id": 464 + }, + { + "sites": [ + "shopping" + ], + "task_id": 109, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Add {{product}} to my wish list", + "instantiation_dict": { + "product": "2 Hawaiian Bamboo Orchid Roots #zc50 - by Discount Hawaiian Gifts" + }, + "intent": "Add 2 Hawaiian Bamboo Orchid Roots #zc50 - by Discount Hawaiian Gifts to my wish list", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "2 Hawaiian Bamboo Orchid Roots #zc50 - by Discount Hawaiian Gifts" + ] + } + } + ] + }, + "intent_template_id": 186, + "old_task_id": 466 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 110, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Cancel order {{id}}", + "instantiation_dict": { + "id": "302" + }, + "intent": "Cancel order 302", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/view/order_id/302/", + "locator": "document.querySelector(\"#order_status\").outerText", + "required_contents": { + "exact_match": "Canceled" + } + } + ] + }, + "intent_template_id": 257, + "old_task_id": 470 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 111, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Set up a new, empty repository with the name {{project_name}}?", + "instantiation_dict": { + "project_name": "awesome_llm_reading" + }, + "intent": "Set up a new, empty repository with the name awesome_llm_reading?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/awesome_llm_reading", + "locator": "", + "required_contents": { + "must_include": [ + "awesome_llm_reading" + ] + } + } + ] + }, + "intent_template_id": 292, + "old_task_id": 476 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 112, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "{{name}} wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "instantiation_dict": { + "name": "Vinta" + }, + "intent": "Vinta wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/dotfiles/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'vinta')", + "required_contents": { + "must_include": [ + "Guest" + ] + } + } + ] + }, + "intent_template_id": 294, + "old_task_id": 485 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 113, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Change the page title of \"{{old-heading}}\" page on my site to \"{{heading}}\".", + "instantiation_dict": { + "old-heading": "Home Page", + "heading": "This is the home page!! Leave here!!" + }, + "intent": "Change the page title of \"Home Page\" page on my site to \"This is the home page!! Leave here!!\".", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/cms/page/edit/page_id/2/", + "locator": "document.querySelector('input[name=\"title\"]').value", + "required_contents": { + "exact_match": "This is the home page!! Leave here!!" + } + } + ] + }, + "intent_template_id": 275, + "old_task_id": 488 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 114, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Notify {{name}} in their most recent pending order with message \"{{message}}\"", + "instantiation_dict": { + "name": "Sarah Miller", + "message": "the order is ready to be shipped soon!" + }, + "intent": "Notify Sarah Miller in their most recent pending order with message \"the order is ready to be shipped soon!\"", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "System message: We cannot add order history." + }, + "intent_template_id": 280, + "old_task_id": 491 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 115, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Update order #{{order}} with the {{service}} tracking number {{tracking}}", + "instantiation_dict": { + "tracking": "24353446464", + "order": "307", + "service": "DHL" + }, + "intent": "Update order #307 with the DHL tracking number 24353446464", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/commentsHistory/order_id/307/active_tab/order_shipments/", + "locator": "", + "required_contents": { + "must_include": [ + "Tracking number 24353446464 for DHL assigned" + ] + } + } + ] + }, + "intent_template_id": 284, + "old_task_id": 497 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 116, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Make all {{product}} as out of stock", + "instantiation_dict": { + "product": "Aeno capri" + }, + "intent": "Make all Aeno capri as out of stock", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/1861/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value", + "required_contents": { + "exact_match": "0" + } + } + ] + }, + "intent_template_id": 287, + "old_task_id": 505 + }, + { + "sites": [ + "shopping" + ], + "task_id": 117, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Buy the highest rated product from the {{product_category}} category within a budget {{dollar_value}}.", + "instantiation_dict": { + "product_category": "meat substitute", + "dollar_value": "between 100 and 200" + }, + "intent": "Buy the highest rated product from the meat substitute category within a budget between 100 and 200.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B01CTR3DLE" + ] + } + } + ] + }, + "intent_template_id": 172, + "old_task_id": 506 + }, + { + "sites": [ + "shopping" + ], + "task_id": 118, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Buy the best rating product from \"{{category}}\" category with at least 5 reviews and the product is least expensive", + "instantiation_dict": { + "category": "Men's shoe" + }, + "intent": "Buy the best rating product from \"Men's shoe\" category with at least 5 reviews and the product is least expensive", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": { + "must_include": [ + "B01J4MM3KO" + ] + } + } + ] + }, + "intent_template_id": 216, + "old_task_id": 509 + }, + { + "sites": [ + "shopping" + ], + "task_id": 119, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Add a {{product}} to my wish list.", + "instantiation_dict": { + "product": "white desk" + }, + "intent": "Add a white desk to my wish list.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "white", + "desk" + ] + } + } + ] + }, + "intent_template_id": 189, + "old_task_id": 514 + }, + { + "sites": [ + "shopping" + ], + "task_id": 120, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082/elmwood-inn-fine-teas-orange-vanilla-caffeine-free-fruit-infusion-16-ounce-pouch.html", + "geolocation": null, + "intent_template": "Add this product to my wishlist", + "instantiation_dict": {}, + "intent": "Add this product to my wishlist", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": { + "must_include": [ + "Elmwood Inn Fine Teas, Orange Vanilla Caffeine-free Fruit Infusion, 16-Ounce Pouch" + ] + } + } + ] + }, + "intent_template_id": 196, + "old_task_id": 516 + }, + { + "sites": [ + "shopping" + ], + "task_id": 121, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Subscribe to the newsletter of OneStopMarket", + "instantiation_dict": {}, + "intent": "Subscribe to the newsletter of OneStopMarket", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/newsletter/manage/", + "locator": "document.querySelector('[title=\"General Subscription\"').checked.toString()", + "required_contents": { + "exact_match": "true" + } + } + ] + }, + "intent_template_id": 199, + "old_task_id": 521 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 122, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Star the top {{number}} most stared repos in Gitlab", + "instantiation_dict": { + "number": "eight" + }, + "intent": "Star the top eight most stared repos in Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/users/byteblaze/starred", + "locator": "", + "required_contents": { + "must_include": [ + "AndroidSlidingUpPanel", + "create-react-app", + "ffmpeg-python", + "PHP_XLSXWriter", + "AndroidAsync", + "Pytorch-GAN", + "administrate", + "keycloak" + ] + } + } + ] + }, + "intent_template_id": 354, + "old_task_id": 524 + }, + { + "sites": [ + "shopping" + ], + "task_id": 123, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Draft a refund message via their \"contact us\" form for the {{product}} I bought {{time}}. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "instantiation_dict": { + "product": "phone screen protector", + "time": "March 2023" + }, + "intent": "Draft a refund message via their \"contact us\" form for the phone screen protector I bought March 2023. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What’s on your mind?\"').value", + "required_contents": { + "fuzzy_match": [ + "refund", + "it broke after three days of use", + "000000180", + "12.99" + ] + } + } + ] + }, + "intent_template_id": 154, + "old_task_id": 528 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 124, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Follow {{account_list}} on Gitlab", + "instantiation_dict": { + "account_list": [ + "Jakub Klinkovský", + "Koushik", + "Vinta Chen" + ] + }, + "intent": "Follow ['Jakub Klinkovský', 'Koushik', 'Vinta Chen'] on Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": { + "must_include": [ + "@lahwaacz", + "@koush", + "@vinta" + ] + } + } + ] + }, + "intent_template_id": 330, + "old_task_id": 534 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 125, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Modify the address of order #{{order_id}} to {{address}}", + "instantiation_dict": { + "order_id": "299", + "address": "456 Oak Avenue, New York, NY, 10001" + }, + "intent": "Modify the address of order #299 to 456 Oak Avenue, New York, NY, 10001", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/sales/order/view/order_id/299", + "locator": "", + "required_contents": { + "must_include": [ + "456 Oak Avenue", + "New York", + "10001" + ] + } + } + ] + }, + "intent_template_id": 240, + "old_task_id": 538 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 126, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Add a new {{option}} {{value}} to {{base_setting}} of {{product}}", + "instantiation_dict": { + "option": "color", + "value": "blue", + "base_setting": "size S and M", + "product": "Frankie Sweatshirt" + }, + "intent": "Add a new color blue to size S and M of Frankie Sweatshirt", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/110/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": { + "must_include": [ + "Sweatshirt-M-Blue", + "Sweatshirt-S-Blue" + ] + } + } + ] + }, + "intent_template_id": 252, + "old_task_id": 548 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 127, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "create a repository named {{name}} that includes a README file with the links to the most active {{num}} DIY ideas on DIY subreddit?", + "instantiation_dict": { + "name": "Do it myself", + "num": 8 + }, + "intent": "create a repository named Do it myself that includes a README file with the links to the most active 8 DIY ideas on DIY subreddit?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/Do-it-myself/-/raw/main/README.md", + "locator": "", + "required_contents": { + "must_include": [ + "http://localhost:28080/f/DIY/118903/separate-glued-plastic-parts", + "http://localhost:28080/f/DIY/118923/how-would-you-fix-this-dryer-vent-mess", + "http://localhost:28080/f/DIY/118935/basement-bulkhead-soffit-wall-framing", + "http://localhost:28080/f/DIY/118904/ge-water-heater-pilot-light-won-t-stay-lit", + "http://localhost:28080/f/DIY/118960/attempting-to-move-a-wall-outlet-in-my-basement-a-few-inches", + "http://localhost:28080/f/DIY/118931/afci-outlet-question", + "http://localhost:28080/f/DIY/118824/teflon-tape-to-attach-washing-machine-drain-hose-to-pipe", + "http://localhost:28080/f/DIY/118866/paver-base-for-shed" + ] + } + } + ] + }, + "intent_template_id": 88, + "old_task_id": 566 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 128, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Invite {{collaborator_account_list}} as collaborator to {{repo}} repo", + "instantiation_dict": { + "collaborator_account_list": "Jakub Klinkovský and Benoît Blanchon", + "repo": "gimmiethat.space" + }, + "intent": "Invite Jakub Klinkovský and Benoît Blanchon as collaborator to gimmiethat.space repo", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/gimmiethat.space/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@lahwaacz", + "@bblanchon" + ] + } + } + ] + }, + "intent_template_id": 293, + "old_task_id": 567 + }, + { + "sites": [ + "shopping" + ], + "task_id": 129, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "I recently moved, my address is {{address}}, update my information on OneStopShopping accordingly", + "instantiation_dict": { + "address": "111 Magnolia Path, Atlanta, GA, 30303" + }, + "intent": "I recently moved, my address is 111 Magnolia Path, Atlanta, GA, 30303, update my information on OneStopShopping accordingly", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28082/customer/address", + "locator": "document.querySelector(\".box.box-address-shipping > .box-content\").outerText", + "required_contents": { + "must_include": [ + "111 Magnolia Path", + "Atlanta, Georgia, 30303" + ] + } + }, + { + "url": "http://localhost:28082/customer/address", + "locator": "document.querySelector(\".box.box-address-billing > .box-content\").outerText", + "required_contents": { + "must_include": [ + "111 Magnolia Path", + "Atlanta, Georgia, 30303" + ] + } + } + ] + }, + "intent_template_id": 165, + "old_task_id": 574 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 130, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Add the following users to my {{repo}} as {{role}}: {{user_list}}", + "instantiation_dict": { + "repo": "GitHub timeline item management extension", + "role": "maintainer", + "user_list": [ + "abisubramanya27", + "lahwaacz" + ] + }, + "intent": "Add the following users to my GitHub timeline item management extension as maintainer: ['abisubramanya27', 'lahwaacz']", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/remove-board-movement-events-from-the-github-issue-timeline/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'abisubramanya27')", + "required_contents": { + "must_include": [ + "Maintainer" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/remove-board-movement-events-from-the-github-issue-timeline/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'lahwaacz')", + "required_contents": { + "must_include": [ + "Maintainer" + ] + } + } + ] + }, + "intent_template_id": 351, + "old_task_id": 577 + }, + { + "sites": [ + "reddit" + ], + "task_id": 131, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Create a new forum named {{name}}, with a description of {{description}}, and include {{sidebar_list}} in the sidebar?", + "instantiation_dict": { + "name": "Cyberpunk", + "description": "Welcome to the future", + "sidebar_list": [ + "Games", + "Books", + "Movies", + "Future" + ] + }, + "intent": "Create a new forum named Cyberpunk, with a description of Welcome to the future, and include ['Games', 'Books', 'Movies', 'Future'] in the sidebar?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/Cyberpunk/edit", + "locator": "document.querySelector(\"#forum_description\").value", + "required_contents": { + "must_include": [ + "Welcome to the future" + ] + } + }, + { + "url": "http://localhost:28080/f/Cyberpunk/edit", + "locator": "document.querySelector(\"#forum_sidebar\").value", + "required_contents": { + "must_include": [ + "Games", + "Books", + "Movies", + "Future" + ] + } + } + ] + }, + "intent_template_id": 7, + "old_task_id": 582 + }, + { + "sites": [ + "reddit" + ], + "task_id": 132, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Open the thread of a trending post on the forum \"{{subreddit}}\" and subscribe.", + "instantiation_dict": { + "subreddit": "machine learning" + }, + "intent": "Open the thread of a trending post on the forum \"machine learning\" and subscribe.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/MachineLearning", + "program_html": [ + { + "url": "http://localhost:28080/", + "locator": "document.querySelector(\"#sidebar > section\").outerText", + "required_contents": { + "must_include": [ + "MachineLearning" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 4, + "old_task_id": 599 + }, + { + "sites": [ + "reddit" + ], + "task_id": 133, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post my question, \"{{question}}\", in a subreddit where I'm likely to get an answer", + "instantiation_dict": { + "question": "is car necessary in NYC" + }, + "intent": "Post my question, \"is car necessary in NYC\", in a subreddit where I'm likely to get an answer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/nyc", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "is car necessary in NYC" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 3765, + "old_task_id": 601 + }, + { + "sites": [ + "reddit" + ], + "task_id": 134, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Find a subreddit focused on topics related to {{topic}}, and post my question, \"{{question}}\" there", + "instantiation_dict": { + "topic": "gaming consoles", + "question": "what is the recommended console to buy these days" + }, + "intent": "Find a subreddit focused on topics related to gaming consoles, and post my question, \"what is the recommended console to buy these days\" there", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/consoles", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "what is the recommended console to buy these days" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 5, + "old_task_id": 605 + }, + { + "sites": [ + "reddit" + ], + "task_id": 135, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post a review of my recent reading \"{{book}}\" in the r/books with my comment \"{{content}}\".", + "instantiation_dict": { + "book": "big little lies", + "content": "can't stop it" + }, + "intent": "Post a review of my recent reading \"big little lies\" in the r/books with my comment \"can't stop it\".", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "big little lies", + "can't stop it" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 9, + "old_task_id": 612 + }, + { + "sites": [ + "reddit" + ], + "task_id": 136, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080/f/pics", + "geolocation": null, + "intent_template": "Re-post the image of {{content}} in this page to {{subreddit}} subreddit and note \"from /f/pics\"", + "instantiation_dict": { + "content": "costume contest", + "subreddit": "funny" + }, + "intent": "Re-post the image of costume contest in this page to funny subreddit and note \"from /f/pics\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/funny", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "from /f/pics" + ] + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "[...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "bd8bc5f4c846aac4df08626faa3a34a7d47c8f3bdd92bf615a54afd939f063a7.jpg" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 11, + "old_task_id": 619 + }, + { + "sites": [ + "reddit" + ], + "task_id": 137, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Create a discussion post about \"{{topic}}\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "instantiation_dict": { + "topic": "Iphone 14" + }, + "intent": "Create a discussion post about \"Iphone 14\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/iphone", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "your opinion", + "Iphone 14" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 13, + "old_task_id": 626 + }, + { + "sites": [ + "reddit" + ], + "task_id": 138, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Ask for product recommendations for {{category}} within a budget of {{price}} in {{subreddit}}", + "instantiation_dict": { + "category": "running shoes", + "price": "$100", + "subreddit": "r/sports" + }, + "intent": "Ask for product recommendations for running shoes within a budget of $100 in r/sports", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/sports", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "100", + "running", + "shoes" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 15, + "old_task_id": 631 + }, + { + "sites": [ + "reddit" + ], + "task_id": 139, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post a notice on a virtual meetup for {{interest}} enthusiasts on {{date}} in the {{subreddit}} subreddit", + "instantiation_dict": { + "interest": "Harry Potter", + "date": "July 8th", + "subreddit": "books" + }, + "intent": "Post a notice on a virtual meetup for Harry Potter enthusiasts on July 8th in the books subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "fuzzy_match": [ + "Harry Potter", + "July 8th", + "virtual meetup" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 16, + "old_task_id": 641 + }, + { + "sites": [ + "reddit" + ], + "task_id": 140, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Post in {{subreddit}} subreddit about what could machine learning help the correpong field.", + "instantiation_dict": { + "subreddit": "books" + }, + "intent": "Post in books subreddit about what could machine learning help the correpong field.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "fuzzy_match": [ + "machine learning", + "help" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 19, + "old_task_id": 645 + }, + { + "sites": [ + "reddit" + ], + "task_id": 141, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080/f/technology/134852/ai-experts-disown-musk-backed-campaign-citing-their-research", + "geolocation": null, + "intent_template": "Reply to {{position_description}} with my comment \"{{content_description}}\"", + "instantiation_dict": { + "position_description": "the post", + "content_description": "???" + }, + "intent": "Reply to the post with my comment \"???\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/technology/134852/ai-experts-disown-musk-backed-campaign-citing-their-research", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.comment__body').outerText", + "required_contents": { + "exact_match": "???" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 23, + "old_task_id": 652 + }, + { + "sites": [ + "shopping" + ], + "task_id": 142, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Fill the \"contact us\" form in the site for a refund on the {{product}} I bought, stating that it broke after just three days of use. Also, ensure to include the order number #{{order_id}} and the product SKU. Don't submit yet, I will check.", + "instantiation_dict": { + "product": "speaker", + "order_id": "148" + }, + "intent": "Fill the \"contact us\" form in the site for a refund on the speaker I bought, stating that it broke after just three days of use. Also, ensure to include the order number #148 and the product SKU. Don't submit yet, I will check.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/contact", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What’s on your mind?\"').value", + "required_contents": { + "fuzzy_match": [ + "refund", + "broke after three days of use", + "148", + "B003FVW3VA" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 153, + "old_task_id": 657 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 143, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Submit a merge request for {{source_branch}} branch to be merged into {{target_branch}} branch, assign {{reviewer}} as the reviewer", + "instantiation_dict": { + "source_branch": "a11yproject.com/redesign", + "target_branch": "master", + "reviewer": "Roshan Jossy" + }, + "intent": "Submit a merge request for a11yproject.com/redesign branch to be merged into master branch, assign Roshan Jossy as the reviewer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/a11yproject/a11yproject.com/-/merge_requests", + "program_html": [ + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[0].outerText", + "required_contents": { + "exact_match": "redesign" + } + }, + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[1].outerText", + "required_contents": { + "exact_match": "main" + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.reviewer').outerText", + "required_contents": { + "must_include": [ + "Roshan Jossy" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 335, + "old_task_id": 668 + }, + { + "sites": [ + "shopping", + "reddit" + ], + "task_id": 144, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Gather the titles of {{product}} reviews with {{rating}} rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on {{product}}\"", + "instantiation_dict": { + "product": "Racing Wheel Overdrive for Xbox X", + "rating": "1 star" + }, + "intent": "Gather the titles of Racing Wheel Overdrive for Xbox X reviews with 1 star rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on Racing Wheel Overdrive for Xbox X\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/gaming", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__title').outerText", + "required_contents": { + "exact_match": "real user feedback on Racing Wheel Overdrive for Xbox X" + } + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "must_include": [ + "Unable to set neutral steering", + "Doesn’t work with PC", + "Crazy problems in automatic mode", + "pedals stopped working", + "Only works with certain games" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 101, + "old_task_id": 673 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 145, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Lookup orders that are {{status}}", + "instantiation_dict": { + "status": "canceled" + }, + "intent": "Lookup orders that are canceled", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/sales/order/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div.admin__data-grid-filters-current\").outerText", + "required_contents": { + "must_include": [ + "Canceled" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 253, + "old_task_id": 678 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 146, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Find a GitLab repository related to {{topic}} and make a Reddit post linking to it in a relevant subreddit", + "instantiation_dict": { + "topic": "chatGPT" + }, + "intent": "Find a GitLab repository related to chatGPT and make a Reddit post linking to it in a relevant subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/machinelearning |OR| http://localhost:28080/f/singularity |OR| http://localhost:28080/f/deeplearning |OR| http://localhost:28080/f/technology", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": { + "must_include": [ + "chatgpt", + "http://localhost:28084/convexegg/chatgpt" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 116, + "old_task_id": 682 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 147, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Promote {{repo}} to subreddit {{subreddit}} with the description from the repo itself.", + "instantiation_dict": { + "repo": "auth0/angular-storage", + "subreddit": "technology" + }, + "intent": "Promote auth0/angular-storage to subreddit technology with the description from the repo itself.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28080/f/technology", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText + [...document.querySelector('.submission__inner').querySelectorAll('[href],[src]')].map(elem => elem.getAttribute('href') || elem.getAttribute('src')).join(' ')", + "required_contents": { + "must_include": [ + "A storage library for AngularJS done right", + "http://localhost:28084/auth0/angular-storage" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 117, + "old_task_id": 686 + }, + { + "sites": [ + "shopping" + ], + "task_id": 148, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Draft an email to the shop owner via their contact us function for a coupon as {{reason}}", + "instantiation_dict": { + "reason": "my refund is suppoed to be replaced by a coupon" + }, + "intent": "Draft an email to the shop owner via their contact us function for a coupon as my refund is suppoed to be replaced by a coupon", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "http://localhost:28082/contact/index/", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 163, + "old_task_id": 693 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 149, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Today is 3/15/2023, generate a {{report}} {{time_span}}", + "instantiation_dict": { + "report": "sales order report", + "time_span": "for last month" + }, + "intent": "Today is 3/15/2023, generate a sales order report for last month", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/reports/report_sales/sales", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "2/1/23" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "2/28/23" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 268, + "old_task_id": 704 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 150, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "Create a {{type}} report from {{start_date}} to {{end_date}}", + "instantiation_dict": { + "type": "shipping", + "start_date": "08/05/2022", + "end_date": "03/01/2023" + }, + "intent": "Create a shipping report from 08/05/2022 to 03/01/2023", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28083/admin/reports/report_sales/shipping", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": { + "exact_match": "8/5/22" + } + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": { + "exact_match": "3/1/23" + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 271, + "old_task_id": 710 + }, + { + "sites": [ + "reddit" + ], + "task_id": 151, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Thumbs down the top {{k}} post ever in {{subreddit}}.", + "instantiation_dict": { + "subreddit": "gadgets", + "k": "1" + }, + "intent": "Thumbs down the top 1 post ever in gadgets.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/gadgets/19459/a-custom-gaming-pc-built-inside-a-vintage-1940s-motorola", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 24, + "old_task_id": 714 + }, + { + "sites": [ + "reddit" + ], + "task_id": 152, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Like all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "subreddit": "earthporn", + "user": "CameronKelsey" + }, + "intent": "Like all submissions created by CameronKelsey in subreddit earthporn", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/EarthPorn/98332/my-favorite-place-on-the-planet-henry-s-fork-of-the-snake", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/EarthPorn/98297/2-years-later-this-is-still-one-of-the-most-incredible", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + }, + { + "url": "http://localhost:28080/f/EarthPorn/98256/i-can-t-wait-for-all-this-green-to-start-coming-back-little", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-upvoted" + ] + } + } + ] + }, + "intent_template_id": 25, + "old_task_id": 720 + }, + { + "sites": [ + "reddit" + ], + "task_id": 153, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "DisLike all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "AdamCannon", + "subreddit": "UpliftingNews" + }, + "intent": "DisLike all submissions created by AdamCannon in subreddit UpliftingNews", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/UpliftingNews/16087/same-sex-marriage-is-now-legal-in-all-of-mexico-s-states", + "locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')", + "required_contents": { + "must_include": [ + "vote vote--user-downvoted" + ] + } + } + ] + }, + "intent_template_id": 1510, + "old_task_id": 729 + }, + { + "sites": [ + "reddit" + ], + "task_id": 154, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://localhost:28080", + "geolocation": null, + "intent_template": "Edit my post on {{post}} by adding a line to the body that says \"{{content}}\"", + "instantiation_dict": { + "post": "Star Trek Starfleet Academy series", + "content": "Every watch makes me feel like a kid again" + }, + "intent": "Edit my post on Star Trek Starfleet Academy series by adding a line to the body that says \"Every watch makes me feel like a kid again\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28080/f/television/135201/star-trek-starfleet-academy-series-from-alex-kurtzman-and", + "locator": "document.querySelector('.submission__body').outerText", + "required_contents": { + "exact_match": "Every watch makes me feel like a kid again" + } + } + ] + }, + "intent_template_id": 27, + "old_task_id": 733 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 155, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Show me the way from {{location}} to the home stadium of {{sport_team}} {{time}}", + "instantiation_dict": { + "location": "Carnegie Mellon University", + "sport_team": "Boston home NBA team", + "time": "" + }, + "intent": "Show me the way from Carnegie Mellon University to the home stadium of Boston home NBA team ", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Carnegie Mellon University", + "Pittsburgh" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "TD Garden", + "Boston", + "Massachusetts" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + } + ] + }, + "intent_template_id": 94, + "old_task_id": 741 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 156, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create a new {{scope}} project \"awesome-llms\" and add {{account_list}} as members", + "instantiation_dict": { + "scope": "public", + "account_list": "primer, convexegg, abishek" + }, + "intent": "Create a new public project \"awesome-llms\" and add primer, convexegg, abishek as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/awesome-llms", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "public" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/awesome-llms/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@primer", + "@convexegg", + "@abisubramanya27" + ] + } + } + ] + }, + "intent_template_id": 332, + "old_task_id": 745 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 157, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Start a private project {{project_name}} with {{template}} template and add {{account_list}} as members", + "instantiation_dict": { + "project_name": "web_agent_android_xl", + "template": "Android", + "account_list": "primer, convexegg, abishek" + }, + "intent": "Start a private project web_agent_android_xl with Android template and add primer, convexegg, abishek as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/byteblaze/web_agent_android_xl", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": { + "must_include": [ + "Private" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/web_agent_android_xl/-/commits", + "locator": "", + "required_contents": { + "must_include": [ + "Initialized from 'Android' project template" + ] + } + }, + { + "url": "http://localhost:28084/byteblaze/web_agent_android_xl/-/project_members", + "locator": "", + "required_contents": { + "must_include": [ + "@primer", + "@convexegg", + "@abisubramanya27" + ] + } + } + ] + }, + "intent_template_id": 2100, + "old_task_id": 748 + }, + { + "sites": [ + "map", + "shopping_admin" + ], + "task_id": 158, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Show me the route and driving time from {{city1}} to {{city2}}", + "instantiation_dict": { + "city1": "Allentown, PA", + "city2": "the city where my E-commerce customer Amanda Kim lives" + }, + "intent": "Show me the route and driving time from Allentown, PA to the city where my E-commerce customer Amanda Kim lives", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Allentown" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Hoboken", + "New Jersey" + ] + } + } + ] + }, + "intent_template_id": 42, + "old_task_id": 760 + }, + { + "sites": [ + "map" + ], + "task_id": 159, + "require_login": true, + "storage_state": null, + "start_url": "https://www.openstreetmap.org", + "geolocation": null, + "intent_template": "Get directions from {{location/address_1}} to {{location/address_2}} using {{transportation}} options.", + "instantiation_dict": { + "location/address_1": "Carnegie Music Hall in NYC", + "location/address_2": "Carnegie Mellon University", + "transportation": "driving" + }, + "intent": "Get directions from Carnegie Music Hall in NYC to Carnegie Mellon University using driving options.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": { + "exact_match": "1" + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": { + "must_include": [ + "Carnegie Hall", + "West 57th Street", + "Manhattan", + "New York" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": { + "must_include": [ + "Carnegie Mellon University", + "Pittsburgh" + ] + } + } + ] + }, + "intent_template_id": 54, + "old_task_id": 762 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 160, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "http://localhost:28083/admin", + "geolocation": null, + "intent_template": "{{quantity}} {{product}} arrived, update the stock", + "instantiation_dict": { + "quantity": "5", + "product": "blue Cronus yoga pants with size 33" + }, + "intent": "5 blue Cronus yoga pants with size 33 arrived, update the stock", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/872/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": { + "exact_match": "5" + } + }, + { + "url": "http://localhost:28083/admin/catalog/product/edit/id/872/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value", + "required_contents": { + "exact_match": "1" + } + } + ] + }, + "intent_template_id": 241, + "old_task_id": 768 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 161, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Make a folder named {{directory}} on the {{gitlab_repo}} repo and include a file called urls.txt that consists of the links to the 5 most recent posts from {{subreddit}}.", + "instantiation_dict": { + "directory": "car", + "subreddit": "cars", + "gitlab_repo": "gimmiethat.space" + }, + "intent": "Make a folder named car on the gimmiethat.space repo and include a file called urls.txt that consists of the links to the 5 most recent posts from cars.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "the subreddit does not exist", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 84, + "old_task_id": 791 + }, + { + "sites": [ + "shopping" + ], + "task_id": 162, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "http://localhost:28082", + "geolocation": null, + "intent_template": "Change the delivery address for my most recent order to {{address}}.", + "instantiation_dict": { + "address": "77 Massachusetts Ave, Cambridge, MA" + }, + "intent": "Change the delivery address for my most recent order to 77 Massachusetts Ave, Cambridge, MA.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "N/A" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "The shipping address cannot be changed after an order is placed in the website.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 191, + "old_task_id": 798 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 163, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Create an issue in {{repo}} repo with title \"{{issue}}\". Assign the issue to {{account}}. Set due date to be {{due}}", + "instantiation_dict": { + "repo": "a11yproject", + "issue": "404 for many URLs", + "account": "myself", + "due": "2030-1-3" + }, + "intent": "Create an issue in a11yproject repo with title \"404 for many URLs\". Assign the issue to myself. Set due date to be 2030-1-3", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "http://localhost:28084/a11yproject/a11yproject.com/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[data-qa-selector=\"title_content\"]').outerText", + "required_contents": { + "exact_match": "404 for many URLs" + } + }, + { + "url": "last", + "locator": "document.querySelector('[data-testid=\"sidebar-due-date\"').outerText", + "required_contents": { + "must_include": [ + "Jan 3, 2030" + ] + } + }, + { + "url": "last", + "locator": "document.querySelector('.block.assignee').outerText", + "required_contents": { + "must_include": [ + "Byte Blaze" + ] + } + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 327, + "old_task_id": 809 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 164, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "http://localhost:28084", + "geolocation": null, + "intent_template": "Assign the issue regarding {{issue}} in {{repo}} to {{account}}.", + "instantiation_dict": { + "repo": "a11yproject", + "issue": 404, + "account": "myself" + }, + "intent": "Assign the issue regarding 404 in a11yproject to myself.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "http://localhost:28084/dashboard/issues?scope=all&state=opened&assignee_username=byteblaze", + "locator": "", + "required_contents": { + "must_include": [ + "404s, bad host, timeouts, bad urls for URLs linked from website" + ] + } + } + ] + }, + "intent_template_id": 999, + "old_task_id": 811 + } +] \ No newline at end of file diff --git a/VAB-WebArena-Lite/new/test_webarena_lite.raw.json b/VAB-WebArena-Lite/config_files/wa/test_webarena_lite.raw.json similarity index 100% rename from VAB-WebArena-Lite/new/test_webarena_lite.raw.json rename to VAB-WebArena-Lite/config_files/wa/test_webarena_lite.raw.json diff --git a/VAB-WebArena-Lite/debug_info/all_element.json b/VAB-WebArena-Lite/debug_info/all_element.json new file mode 100644 index 0000000..e69de29 diff --git a/VAB-WebArena-Lite/debug_info/marked.png b/VAB-WebArena-Lite/debug_info/marked.png new file mode 100644 index 0000000..eec9c77 Binary files /dev/null and b/VAB-WebArena-Lite/debug_info/marked.png differ diff --git a/VAB-WebArena-Lite/debug_info/parsed.html b/VAB-WebArena-Lite/debug_info/parsed.html new file mode 100644 index 0000000..8b08201 --- /dev/null +++ b/VAB-WebArena-Lite/debug_info/parsed.html @@ -0,0 +1 @@ +
Submitted by _sshin_ t3_10zmz2d MachineLearning
76 comments
Submitted by radi-cho t3_11izjc1 MachineLearning
49 comments
782
Submitted by perception-eng t3_zubg2u MachineLearning
43 comments
756
Submitted by rumovoice t3_11hscl1 MachineLearning
60 comments
742
[D] How frustrating are the ML interviews these days!!! TOP 3% interview joke Submitted by Mogady t3_y7708w MachineLearning
176 comments
725
[News] Twitter algorithm now open source Submitted by John-The-Bomb-2 t3_127wy7i MachineLearning
49 comments
703
Submitted by LegendOfHiddnTempl t3_1169uzy MachineLearning
23 comments
656
\ No newline at end of file diff --git a/VAB-WebArena-Lite/debug_info/raw.html b/VAB-WebArena-Lite/debug_info/raw.html new file mode 100644 index 0000000..4848d0d --- /dev/null +++ b/VAB-WebArena-Lite/debug_info/raw.html @@ -0,0 +1,1865 @@ + + + Postmill + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/VAB-WebArena-Lite/debug_info/screenshot_raw.png b/VAB-WebArena-Lite/debug_info/screenshot_raw.png new file mode 100644 index 0000000..9ed4ba6 Binary files /dev/null and b/VAB-WebArena-Lite/debug_info/screenshot_raw.png differ diff --git a/VAB-WebArena-Lite/environment_docker/README.md b/VAB-WebArena-Lite/environment_docker/README.md new file mode 100644 index 0000000..1e73f1d --- /dev/null +++ b/VAB-WebArena-Lite/environment_docker/README.md @@ -0,0 +1,100 @@ +# Docker for WebArena Websites +This REAME file host the instructions for our Docker images and quick start guide for starting up websites used in VisualWebArena. + +## Classifieds Website + +Download the image zip from one of the following: +- https://drive.google.com/file/d/1m79lp84yXfqdTBHr6IS7_1KkL4sDSemR/view +- https://archive.org/download/classifieds_docker_compose + +``` +unzip classifieds_docker_compose.zip +cd classifieds_docker_compose +vi classifieds_docker_compose/docker-compose.yml # Set CLASSIFIEDS to your site url `http://:9980/`, and change the reset token if required +docker compose up --build -d +# Wait for compose up to finish. This may take a while on the first launch as it downloads several large images from dockerhub. +docker exec classifieds_db mysql -u root -ppassword osclass -e 'source docker-entrypoint-initdb.d/osclass_craigslist.sql' # Populate DB with content +``` +Now you can visit `http://:9980`. + + +## Shopping Website (OneStopShop) + +The Shopping Website follows the same setup as the same environment used in WebArena. Download the image tar from the following mirrors: +- https://drive.google.com/file/d/1gxXalk9O0p9eu1YkIJcmZta1nvvyAJpA/view?usp=sharing +- https://archive.org/download/webarena-env-shopping-image +- http://metis.lti.cs.cmu.edu/webarena-images/shopping_final_0712.tar + +``` +docker load --input shopping_final_0712.tar +docker run --name shopping -p 7770:80 -d shopping_final_0712 +# wait ~1 min to wait all services to start + +docker exec shopping /var/www/magento2/bin/magento setup:store-config:set --base-url="http://:7770" # no trailing slash +docker exec shopping mysql -u magentouser -pMyPassword magentodb -e 'UPDATE core_config_data SET value="http://:7770/" WHERE path = "web/secure/base_url";' +docker exec shopping /var/www/magento2/bin/magento cache:flush + +# Disable re-indexing of products +docker exec shopping /var/www/magento2/bin/magento indexer:set-mode schedule catalogrule_product +docker exec shopping /var/www/magento2/bin/magento indexer:set-mode schedule catalogrule_rule +docker exec shopping /var/www/magento2/bin/magento indexer:set-mode schedule catalogsearch_fulltext +docker exec shopping /var/www/magento2/bin/magento indexer:set-mode schedule catalog_category_product +docker exec shopping /var/www/magento2/bin/magento indexer:set-mode schedule customer_grid +docker exec shopping /var/www/magento2/bin/magento indexer:set-mode schedule design_config_grid +docker exec shopping /var/www/magento2/bin/magento indexer:set-mode schedule inventory +docker exec shopping /var/www/magento2/bin/magento indexer:set-mode schedule catalog_product_category +docker exec shopping /var/www/magento2/bin/magento indexer:set-mode schedule catalog_product_attribute +docker exec shopping /var/www/magento2/bin/magento indexer:set-mode schedule catalog_product_price +docker exec shopping /var/www/magento2/bin/magento indexer:set-mode schedule cataloginventory_stock +``` +Now you can visit `http://:7770`. + + +## Social Forum Website (Reddit) + +The Wikipedia Website follows the same setup procedure as the environment used in WebArena. Download the image tar from the following mirrors: +- https://drive.google.com/file/d/17Qpp1iu_mPqzgO_73Z9BnFjHrzmX9DGf/view?usp=sharing +- https://archive.org/download/postmill-populated-exposed-withimg +- http://metis.lti.cs.cmu.edu/webarena-images/postmill-populated-exposed-withimg.tar + +``` +docker load --input postmill-populated-exposed-withimg.tar +docker run --name forum -p 9999:80 -d postmill-populated-exposed-withimg +``` +Now you can visit `http://:9999/`. + + +## Wikipedia Website + +The Wikipedia Website follows the same setup procedure as the environment used in WebArena. Download the data from the following mirrors: +- https://drive.google.com/file/d/1Um4QLxi_bGv5bP6kt83Ke0lNjuV9Tm0P/view?usp=sharing +- https://archive.org/download/webarena-env-wiki-image +- http://metis.lti.cs.cmu.edu/webarena-images/wikipedia_en_all_maxi_2022-05.zim + +``` +docker run -d --name=wikipedia --volume=/:/data -p 8888:80 ghcr.io/kiwix/kiwix-serve:3.3.0 wikipedia_en_all_maxi_2022-05.zim +``` +Now you can visit `http://:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing`. + + +## Homepage + +The homepage lists all available websites which the agent can use to navigate to different sites. +![Homepage](../media/homepage_demo.png) + +To host the homepage, first change `` to the corresponding server hostnames in [webarena-homepage/templates/index.html](webarena-homepage/templates/index.html) +```bash +# Define your actual server hostname +YOUR_ACTUAL_HOSTNAME="" +# Remove trailing / if it exists +YOUR_ACTUAL_HOSTNAME=${YOUR_ACTUAL_HOSTNAME%/} +# Use sed to replace placeholder in the HTML file +perl -pi -e "s||${YOUR_ACTUAL_HOSTNAME}|g" webarena-homepage/templates/index.html +``` + +Then run +``` +cd webarena_homepage +flask run --host=0.0.0.0 --port=4399 +``` +The homepage will be available at `http://:4399`. diff --git a/VAB-WebArena-Lite/environment_docker/webarena-homepage/app.py b/VAB-WebArena-Lite/environment_docker/webarena-homepage/app.py new file mode 100644 index 0000000..0b092c7 --- /dev/null +++ b/VAB-WebArena-Lite/environment_docker/webarena-homepage/app.py @@ -0,0 +1,27 @@ +from flask import Flask, render_template + +app = Flask(__name__) + + +@app.route("/") +def index() -> str: + return render_template("index.html") + + +@app.route("/scratchpad.html") +def scratchpad() -> str: + return render_template("scratchpad.html") + + +@app.route("/calculator.html") +def calculator() -> str: + return render_template("calculator.html") + + +@app.route("/password.html") +def password() -> str: + return render_template("password.html") + + +if __name__ == "__main__": + app.run(host="0.0.0.0", port=4399) diff --git a/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/calculator.png b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/calculator.png new file mode 100644 index 0000000..53b7013 Binary files /dev/null and b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/calculator.png differ diff --git a/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/classifieds.png b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/classifieds.png new file mode 100644 index 0000000..66c988e Binary files /dev/null and b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/classifieds.png differ diff --git a/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/cms.png b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/cms.png new file mode 100644 index 0000000..7ea5b53 Binary files /dev/null and b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/cms.png differ diff --git a/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/gitlab.png b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/gitlab.png new file mode 100644 index 0000000..a9c4af3 Binary files /dev/null and b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/gitlab.png differ diff --git a/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/manual1.png b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/manual1.png new file mode 100644 index 0000000..0416212 Binary files /dev/null and b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/manual1.png differ diff --git a/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/manual2.png b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/manual2.png new file mode 100644 index 0000000..be6c779 Binary files /dev/null and b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/manual2.png differ diff --git a/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/map.png b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/map.png new file mode 100644 index 0000000..6718f51 Binary files /dev/null and b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/map.png differ diff --git a/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/onestopshop.png b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/onestopshop.png new file mode 100644 index 0000000..2669443 Binary files /dev/null and b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/onestopshop.png differ diff --git a/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/password.png b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/password.png new file mode 100644 index 0000000..8916513 Binary files /dev/null and b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/password.png differ diff --git a/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/reddit.png b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/reddit.png new file mode 100644 index 0000000..796b006 Binary files /dev/null and b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/reddit.png differ diff --git a/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/scratchpad.png b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/scratchpad.png new file mode 100644 index 0000000..4afea7f Binary files /dev/null and b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/scratchpad.png differ diff --git a/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/wikipedia.png b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/wikipedia.png new file mode 100644 index 0000000..aa46959 Binary files /dev/null and b/VAB-WebArena-Lite/environment_docker/webarena-homepage/static/figures/wikipedia.png differ diff --git a/VAB-WebArena-Lite/environment_docker/webarena-homepage/templates/calculator.html b/VAB-WebArena-Lite/environment_docker/webarena-homepage/templates/calculator.html new file mode 100644 index 0000000..6445298 --- /dev/null +++ b/VAB-WebArena-Lite/environment_docker/webarena-homepage/templates/calculator.html @@ -0,0 +1,109 @@ + + + + Calculator + + + +
+

Calculator

+

Enter the expression and get the results

+ + + +
Result:
+
+ + + + diff --git a/VAB-WebArena-Lite/environment_docker/webarena-homepage/templates/index.html b/VAB-WebArena-Lite/environment_docker/webarena-homepage/templates/index.html new file mode 100644 index 0000000..29bb08a --- /dev/null +++ b/VAB-WebArena-Lite/environment_docker/webarena-homepage/templates/index.html @@ -0,0 +1,127 @@ + + + + Homepage + + + + +
+
+ + +
+ Logo for Classifieds + +

Classifieds

+
+

A classifieds website for people to sell and buy things

+
+ +
+ Logo for OneStopShop + +

OneStopShop

+
+

An online shopping site

+
+ +
+ Logo for Reddit + +

Reddit

+
+

A social news aggregation and discussion website

+
+ +
+ Logo for Calculator + +

Calculator

+
+

A calculator

+
+ +
+ Logo for Scratchpad + +

Scratchpad

+
+

A scratchpad for taking notes

+
+ +
+ Logo for Wikipedia + +

Wikipedia

+
+

An online encyclopedia

+
+ + + +
+ + diff --git a/VAB-WebArena-Lite/environment_docker/webarena-homepage/templates/scratchpad.html b/VAB-WebArena-Lite/environment_docker/webarena-homepage/templates/scratchpad.html new file mode 100644 index 0000000..bd939d5 --- /dev/null +++ b/VAB-WebArena-Lite/environment_docker/webarena-homepage/templates/scratchpad.html @@ -0,0 +1,122 @@ + + + + + Note Taking App + + + +
+

My Notes

+
+ +
+
+
+ + +
+
+ +

History

+ +
+ +
+
+ + + + diff --git a/VAB-WebArena-Lite/evaluation_harness/__init__.py b/VAB-WebArena-Lite/evaluation_harness/__init__.py new file mode 100644 index 0000000..fd0b27d --- /dev/null +++ b/VAB-WebArena-Lite/evaluation_harness/__init__.py @@ -0,0 +1,19 @@ +from .evaluators import * +from .helper_functions import ( + get_query_text, + get_query_text_lowercase, + reddit_get_latest_comment_content_by_username, + reddit_get_latest_comment_obj_by_username, + reddit_get_parent_comment_username_of_latest_comment_by_username, + shopping_get_latest_order_url, + shopping_get_num_reviews, + shopping_get_order_product_name_list, + shopping_get_order_product_option, + shopping_get_order_product_quantity, + shopping_get_product_attributes, + shopping_get_product_price, + shopping_get_rating_as_percentage, + shopping_get_sku_latest_review_author, + shopping_get_sku_latest_review_rating, + shopping_get_sku_latest_review_text, +) diff --git a/VAB-WebArena-Lite/new/evaluators.py b/VAB-WebArena-Lite/evaluation_harness/evaluators.py similarity index 100% rename from VAB-WebArena-Lite/new/evaluators.py rename to VAB-WebArena-Lite/evaluation_harness/evaluators.py diff --git a/VAB-WebArena-Lite/new/helper_functions_eval.py b/VAB-WebArena-Lite/evaluation_harness/helper_functions.py similarity index 98% rename from VAB-WebArena-Lite/new/helper_functions_eval.py rename to VAB-WebArena-Lite/evaluation_harness/helper_functions.py index f98b24d..2f53baa 100644 --- a/VAB-WebArena-Lite/new/helper_functions_eval.py +++ b/VAB-WebArena-Lite/evaluation_harness/helper_functions.py @@ -1,6 +1,7 @@ """Implements helper functions to assist evaluation cases where other evaluators are not suitable.""" import json from datetime import datetime, timezone +import os from typing import Any, Union from urllib.parse import urlparse @@ -597,13 +598,17 @@ def llm_fuzzy_match(pred: str, reference: str, question: str) -> float: logger.info(f'[R] {reference}') logger.info(f'[P] {pred}') + + response = generate_from_openai_chat_completion( - model="gpt-4-1106-preview", + model="gpt-4o", messages=messages, temperature=0, max_tokens=768, top_p=1.0, context_length=0, + api_key=os.environ["OPENAI_API_KEY_FUZZY"], + base_url=os.environ["OPENAI_API_URL_FUZZY"], ).lower() if "partially correct" in response or "incorrect" in response: return 0.0 @@ -633,12 +638,14 @@ def llm_ua_match(pred: str, reference: str, question: str) -> float: ] response = generate_from_openai_chat_completion( - model="gpt-4-1106-preview", + model="gpt-4o", messages=messages, temperature=0, max_tokens=768, top_p=1.0, context_length=0, + api_key=os.environ["OPENAI_API_KEY_FUZZY"], + base_url=os.environ["OPENAI_API_URL_FUZZY"], ).lower() if "different" in response: return 0.0 diff --git a/VAB-WebArena-Lite/evaluation_harness/image_utils.py b/VAB-WebArena-Lite/evaluation_harness/image_utils.py new file mode 100644 index 0000000..da6782e --- /dev/null +++ b/VAB-WebArena-Lite/evaluation_harness/image_utils.py @@ -0,0 +1,84 @@ +from typing import List + +import numpy as np +from PIL import Image +from skimage.metrics import structural_similarity as ssim +from transformers import ( + Blip2ForConditionalGeneration, + Blip2Processor, +) + + +def get_captioning_fn( + device, dtype, model_name: str = "Salesforce/blip2-flan-t5-xl" +) -> callable: + if "blip2" in model_name: + captioning_processor = Blip2Processor.from_pretrained(model_name) + captioning_model = Blip2ForConditionalGeneration.from_pretrained( + model_name, torch_dtype=dtype + ) + else: + raise NotImplementedError( + "Only BLIP-2 models are currently supported" + ) + captioning_model.to(device) + + def caption_images( + images: List[Image.Image], + prompt: List[str] = None, + max_new_tokens: int = 32, + ) -> List[str]: + if prompt is None: + # Perform VQA + inputs = captioning_processor( + images=images, return_tensors="pt" + ).to(device, dtype) + generated_ids = captioning_model.generate( + **inputs, max_new_tokens=max_new_tokens + ) + captions = captioning_processor.batch_decode( + generated_ids, skip_special_tokens=True + ) + else: + # Regular captioning. Prompt is a list of strings, one for each image + assert len(images) == len( + prompt + ), "Number of images and prompts must match, got {} and {}".format( + len(images), len(prompt) + ) + inputs = captioning_processor( + images=images, text=prompt, return_tensors="pt" + ).to(device, dtype) + generated_ids = captioning_model.generate( + **inputs, max_new_tokens=max_new_tokens + ) + captions = captioning_processor.batch_decode( + generated_ids, skip_special_tokens=True + ) + + return captions + + return caption_images + + +def get_image_ssim(imageA, imageB): + # Determine the size to which we should resize + new_size = max(imageA.size[0], imageB.size[0]), max( + imageA.size[1], imageB.size[1] + ) + + # Resize images + imageA = imageA.resize(new_size, Image.LANCZOS) + imageB = imageB.resize(new_size, Image.LANCZOS) + + # Convert images to grayscale + grayA = imageA.convert("L") + grayB = imageB.convert("L") + + # Convert grayscale images to numpy arrays for SSIM computation + grayA = np.array(grayA) + grayB = np.array(grayB) + + # Compute the Structural Similarity Index (SSIM) between the two images + score, _ = ssim(grayA, grayB, full=True) + return score diff --git a/VAB-WebArena-Lite/new/llms_init.py b/VAB-WebArena-Lite/llms/__init__.py similarity index 100% rename from VAB-WebArena-Lite/new/llms_init.py rename to VAB-WebArena-Lite/llms/__init__.py diff --git a/VAB-WebArena-Lite/new/lm_config.py b/VAB-WebArena-Lite/llms/lm_config.py similarity index 100% rename from VAB-WebArena-Lite/new/lm_config.py rename to VAB-WebArena-Lite/llms/lm_config.py diff --git a/VAB-WebArena-Lite/new/api_utils.py b/VAB-WebArena-Lite/llms/providers/api_utils.py similarity index 100% rename from VAB-WebArena-Lite/new/api_utils.py rename to VAB-WebArena-Lite/llms/providers/api_utils.py diff --git a/VAB-WebArena-Lite/llms/providers/gemini_utils.py b/VAB-WebArena-Lite/llms/providers/gemini_utils.py new file mode 100644 index 0000000..f1c3e76 --- /dev/null +++ b/VAB-WebArena-Lite/llms/providers/gemini_utils.py @@ -0,0 +1,105 @@ +"""Tools to generate from Gemini prompts.""" + +import random +import time +from typing import Any + +from google.api_core.exceptions import InvalidArgument +from vertexai.preview.generative_models import ( + GenerativeModel, + HarmBlockThreshold, + HarmCategory, + Image, +) + +model = GenerativeModel("gemini-pro-vision") + + +def retry_with_exponential_backoff( # type: ignore + func, + initial_delay: float = 1, + exponential_base: float = 1, + jitter: bool = True, + max_retries: int = 10, + errors: tuple[Any] = (InvalidArgument,), +): + """Retry a function with exponential backoff.""" + + def wrapper(*args, **kwargs): # type: ignore + # Initialize variables + num_retries = 0 + delay = initial_delay + + # Loop until a successful response or max_retries is hit or an exception is raised + while True: + try: + + return func(*args, **kwargs) + + # Retry on specified errors + except errors as e: + # Increment retries + num_retries += 1 + + # Check if max retries has been reached + if num_retries > max_retries: + raise Exception( + f"Maximum number of retries ({max_retries}) exceeded." + ) + + # Increment the delay + delay *= exponential_base * (1 + jitter * random.random()) + + # Sleep for the delay + time.sleep(delay) + + # Raise exceptions for any errors not specified + except Exception as e: + raise e + + return wrapper + + +@retry_with_exponential_backoff +def generate_from_gemini_completion( + prompt: list[str | Image], + engine: str, + temperature: float, + max_tokens: int, + top_p: float, +) -> str: + del engine + safety_config = { + HarmCategory.HARM_CATEGORY_UNSPECIFIED: HarmBlockThreshold.BLOCK_ONLY_HIGH, + HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH, + HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH, + HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH, + HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH, + } + response = model.generate_content( + prompt, + generation_config=dict( + candidate_count=1, + max_output_tokens=max_tokens, + top_p=top_p, + temperature=temperature, + ), + safety_settings=safety_config, + ) + answer = response.text + return answer + + +@retry_with_exponential_backoff +# debug only +def fake_generate_from_gemini_chat_completion( + messages: list[dict[str, str]], + model: str, + temperature: float, + max_tokens: int, + top_p: float, + context_length: int, + stop_token: str | None = None, +) -> str: + answer = "Let's think step-by-step. This page shows a list of links and buttons. There is a search box with the label 'Search query'. I will click on the search box to type the query. So the action I will perform is \"click [60]\"." + return answer diff --git a/VAB-WebArena-Lite/llms/providers/hf_utils.py b/VAB-WebArena-Lite/llms/providers/hf_utils.py new file mode 100644 index 0000000..b5e8987 --- /dev/null +++ b/VAB-WebArena-Lite/llms/providers/hf_utils.py @@ -0,0 +1,21 @@ +from text_generation import Client # type: ignore + + +def generate_from_huggingface_completion( + prompt: str, + model_endpoint: str, + temperature: float, + top_p: float, + max_new_tokens: int, + stop_sequences: list[str] | None = None, +) -> str: + client = Client(model_endpoint, timeout=60) + generation: str = client.generate( + prompt=prompt, + temperature=temperature, + top_p=top_p, + max_new_tokens=max_new_tokens, + stop_sequences=stop_sequences, + ).generated_text + + return generation diff --git a/VAB-WebArena-Lite/new/openai_utils.py b/VAB-WebArena-Lite/llms/providers/openai_utils.py similarity index 94% rename from VAB-WebArena-Lite/new/openai_utils.py rename to VAB-WebArena-Lite/llms/providers/openai_utils.py index c9754f2..a2e5f6e 100644 --- a/VAB-WebArena-Lite/new/openai_utils.py +++ b/VAB-WebArena-Lite/llms/providers/openai_utils.py @@ -12,9 +12,8 @@ import aiolimiter import openai from openai import AsyncOpenAI, OpenAI -base_url = os.environ.get("OPENAI_API_URL") -client = OpenAI(api_key=os.environ["OPENAI_API_KEY"], base_url=base_url) -aclient = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"], base_url=base_url) + +aclient = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"], base_url=os.environ["OPENAI_API_URL"]) from tqdm.asyncio import tqdm_asyncio @@ -147,7 +146,7 @@ def generate_from_openai_completion( top_p: float, stop_token: str | None = None, api_key: str | None = None, - base_url: str | None = None + base_url: str | None = None, ) -> str: if "OPENAI_API_KEY" not in os.environ and api_key is None: raise ValueError( @@ -155,6 +154,10 @@ def generate_from_openai_completion( ) if api_key is not None: client = OpenAI(api_key=api_key, base_url=base_url) + else: + base_url = os.environ.get("OPENAI_API_URL") + client = OpenAI(api_key=os.environ["OPENAI_API_KEY"], base_url=base_url) + response = client.completions.create( prompt=prompt, model=model, @@ -254,11 +257,18 @@ def generate_from_openai_chat_completion( top_p: float, context_length: int, stop_token: str | None = None, + api_key: str | None = None, + base_url: str | None = None, ) -> str: if "OPENAI_API_KEY" not in os.environ: raise ValueError( "OPENAI_API_KEY environment variable must be set when using OpenAI API." ) + if api_key is not None: + client = OpenAI(api_key=api_key, base_url=base_url) + else: + base_url = os.environ.get("OPENAI_API_URL") + client = OpenAI(api_key=os.environ["OPENAI_API_KEY"], base_url=base_url) response = client.chat.completions.create( model=model, messages=messages, diff --git a/VAB-WebArena-Lite/new/tokenizers.py b/VAB-WebArena-Lite/llms/tokenizers.py similarity index 100% rename from VAB-WebArena-Lite/new/tokenizers.py rename to VAB-WebArena-Lite/llms/tokenizers.py diff --git a/VAB-WebArena-Lite/new/utils.py b/VAB-WebArena-Lite/llms/utils.py similarity index 100% rename from VAB-WebArena-Lite/new/utils.py rename to VAB-WebArena-Lite/llms/utils.py diff --git a/VAB-WebArena-Lite/media/SourceCodePro-SemiBold.ttf b/VAB-WebArena-Lite/media/SourceCodePro-SemiBold.ttf new file mode 100644 index 0000000..834b4d6 Binary files /dev/null and b/VAB-WebArena-Lite/media/SourceCodePro-SemiBold.ttf differ diff --git a/VAB-WebArena-Lite/media/find_restaurant.gif b/VAB-WebArena-Lite/media/find_restaurant.gif new file mode 100644 index 0000000..bd06dc4 Binary files /dev/null and b/VAB-WebArena-Lite/media/find_restaurant.gif differ diff --git a/VAB-WebArena-Lite/media/homepage_demo.png b/VAB-WebArena-Lite/media/homepage_demo.png new file mode 100644 index 0000000..05a4800 Binary files /dev/null and b/VAB-WebArena-Lite/media/homepage_demo.png differ diff --git a/VAB-WebArena-Lite/media/overview.png b/VAB-WebArena-Lite/media/overview.png new file mode 100644 index 0000000..35b61ae Binary files /dev/null and b/VAB-WebArena-Lite/media/overview.png differ diff --git a/VAB-WebArena-Lite/media/som_figure.png b/VAB-WebArena-Lite/media/som_figure.png new file mode 100644 index 0000000..9e1f76b Binary files /dev/null and b/VAB-WebArena-Lite/media/som_figure.png differ diff --git a/VAB-WebArena-Lite/prepare.sh b/VAB-WebArena-Lite/prepare.sh new file mode 100644 index 0000000..09885ad --- /dev/null +++ b/VAB-WebArena-Lite/prepare.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# re-validate login information +mkdir -p ./.auth +python browser_env/auto_login.py \ No newline at end of file diff --git a/VAB-WebArena-Lite/prepare_run.sh b/VAB-WebArena-Lite/prepare_run.sh new file mode 100644 index 0000000..b6d3c3b --- /dev/null +++ b/VAB-WebArena-Lite/prepare_run.sh @@ -0,0 +1,24 @@ +#!/bin/bash +DATASET='webarena' # TODO: select from ['webarena', 'visualwebarena'] +result_dir='' # TODO: set your result_dir +provider='openai' # TODO: select from ['openai', 'finetune', ...] +model='' # TODO: assign model name, which is used for action generation +planner_ip='' # TODO: ip address of the model you are deploying (only if you are deploying your own model using e.g. vllm) +instruction_path='agent/prompts/jsons/p_webrl_chat.json' # e.g., agent/prompts/jsons/p_cot_id_actree_2s.json +test_config_base_dir='config_files/wa/test_webarena_lite' # e.g., config_files/wa/test_webarena_lite +temperature=0.0 + +SERVER='localhost' # TODO: your server address +MAP_SERVER='https://www.openstreetmap.org' # TODO: the server address for MAP tasks +OPENAI_API_KEY='none' # TODO: if you test OpenAI APIs +OPENAI_API_URL='http://192.168.16.116:18080/v1' # TODO: if you test OpenAI APIs +OPENAI_ORGANIZATION='' +CONDA_ENV_NAME='vab' # TODO: the name of your conda environment for testing WebArena + +export DATASET=${DATASET}; export SHOPPING="http://${SERVER}:28082";export SHOPPING_ADMIN="http://${SERVER}:28083/admin";export REDDIT="http://${SERVER}:28080";export GITLAB="http://${SERVER}:28084";export MAP="${MAP_SERVER}";export WIKIPEDIA="http://${SERVER}:28081/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing";export HOMEPAGE="http://${SERVER}:20080";export OPENAI_API_KEY=${OPENAI_API_KEY};export OPENAI_API_URL=${OPENAI_API_URL};export OPENAI_ORGANIZATION=${OPENAI_ORGANIZATION} + +python scripts/generate_test_data.py + +mkdir -p ./.auth +python browser_env/auto_login.py + diff --git a/VAB-WebArena-Lite/pytest.sh b/VAB-WebArena-Lite/pytest.sh new file mode 100644 index 0000000..947bdfe --- /dev/null +++ b/VAB-WebArena-Lite/pytest.sh @@ -0,0 +1,82 @@ +#!/bin/bash +DATASET='webarena' # TODO: select from ['webarena', 'visualwebarena'] +result_dir='' # TODO: set your result_dir +provider='' # TODO: select from ['openai', 'finetune', ...] +model='' # TODO: assign model name. If `provider == finetune`, choose `finetuned` +instruction_path='agent/prompts/jsons/p_som_cot_id_actree_3s.json' # e.g., agent/prompts/jsons/p_cot_id_actree_2s.json +test_config_base_dir='config_files/wa/test_webarena_lite' # e.g., config_files/wa/test_webarena_lite +temperature=0.0 + +SERVER='localhost' # TODO: your server address +MAP_SERVER='www.openstreetmap.org' # TODO: the server address for MAP tasks +OPENAI_API_KEY='' # TODO: if you test OpenAI APIs +OPENAI_ORGANIZATION='' +CONDA_ENV_NAME='' # TODO: the name of your conda environment for testing WebArena + +export DATASET=${DATASET}; export SHOPPING='http://${SERVER}:28082';export SHOPPING_ADMIN='http://${SERVER}:28083/admin';export REDDIT='http://${SERVER}:28080';export GITLAB='http://${SERVER}:28084';export MAP='https://www.openstreetmap.org';export WIKIPEDIA='http://${SERVER}:28081/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing';export HOMEPAGE='http://${SERVER}:20080';export OPENAI_API_KEY=${OPENAI_API_KEY};export OPENAI_ORGANIZATION=${OPENAI_ORGANIZATION} + +# pytest -x +pytest tests/test_browser_env/test_action_functionalities.py + + +# get the number of tmux panes +# num_panes=$(tmux list-panes | wc -l) + +# # calculate how many panes need to be created +# let "panes_to_create = 8 - num_panes" + +# # array of tmux commands to create each pane +# tmux_commands=( +# 'tmux split-window -h' +# 'tmux split-window -v' +# 'tmux select-pane -t 0; tmux split-window -v' +# 'tmux select-pane -t 0; tmux split-window -v' +# 'tmux select-pane -t 2; tmux split-window -v' +# 'tmux select-pane -t 4; tmux split-window -v' +# 'tmux select-pane -t 6; tmux split-window -v' +# ) + +# # create panes up to 8 +# for ((i=0; i<$panes_to_create; i++)); do +# eval ${tmux_commands[$i]} +# done + +# #!/bin/bash + +# # Function to run a job +# run_job() { +# tmux select-pane -t $1 +# tmux send-keys "tmux set mouse on; conda activate ${CONDA_ENV_NAME}; ${ENV_VARIABLES}; until python run.py --viewport_width 1280 --viewport_height 720 --test_start_idx $2 --test_end_idx $3 --provider ${provider} --model ${model} --instruction_path ${instruction_path} --temperature ${temperature} --test_config_base_dir ${test_config_base_dir} --result_dir ${result_dir} --action_set_tag som --observation_type image_som; do echo 'crashed' >&2; sleep 1; done" C-m +# sleep 3 +# } + +# TOLERANCE=2 +# run_batch() { +# args=("$@") # save all arguments in an array +# num_jobs=${#args[@]} # get number of arguments + +# for ((i=1; i<$num_jobs; i++)); do +# run_job $i ${args[i-1]} ${args[i]} +# done + +# # Wait for all jobs to finish +# while tmux list-panes -F "#{pane_pid} #{pane_current_command}" | grep -q python; do +# sleep 100 # wait for 10 seconds before checking again +# done + +# # Run checker +# while ! python scripts/check_error_runs.py ${result_dir} --delete_errors --tolerance ${TOLERANCE}; do +# echo "Check failed, rerunning jobs..." +# for ((i=1; i<$num_jobs; i++)); do +# run_job $i ${args[i-1]} ${args[i]} +# done + +# # Wait for all jobs to finish +# while tmux list-panes -F "#{pane_pid} #{pane_current_command}" | grep -q python; do +# sleep 100 # wait for 10 seconds before checking again +# done +# done + +# } + +# run_batch 0 24 48 72 96 120 143 165 diff --git a/VAB-WebArena-Lite/requirements.txt b/VAB-WebArena-Lite/requirements.txt new file mode 100644 index 0000000..def1619 --- /dev/null +++ b/VAB-WebArena-Lite/requirements.txt @@ -0,0 +1,146 @@ +accelerate==0.22.0 +aiohttp==3.8.5 +aiolimiter==1.1.0 +aiosignal==1.3.1 +annotated-types==0.5.0 +anyio==3.7.1 +appnope==0.1.3 +asttokens==2.4.0 +async-timeout==4.0.3 +attrs==23.1.0 +backcall==0.2.0 +beartype==0.12.0 +beautifulsoup4==4.12.2 +certifi==2023.7.22 +cfgv==3.4.0 +charset-normalizer==3.2.0 +click==8.1.7 +cloudpickle==2.2.1 +comm==0.1.4 +contourpy==1.1.1 +cycler==0.12.1 +datasets==2.14.4 +debugpy==1.8.0 +decorator==5.1.1 +dill==0.3.7 +distlib==0.3.7 +evaluate==0.4.0 +exceptiongroup==1.1.3 +execnet==2.0.2 +executing==2.0.0 +Farama-Notifications==0.0.4 +fastjsonschema==2.18.1 +filelock==3.12.2 +fonttools==4.43.1 +frozenlist==1.4.0 +fsspec==2023.6.0 +google-api-core==2.15.0 +google-auth==2.26.1 +google-cloud-aiplatform==1.38.1 +google-cloud-bigquery==3.14.1 +google-cloud-core==2.4.1 +google-cloud-resource-manager==1.11.0 +google-cloud-storage==2.14.0 +google-crc32c==1.5.0 +google-resumable-media==2.7.0 +googleapis-common-protos==1.62.0 +gradio_client==0.5.2 +greenlet==2.0.2 +grpc-google-iam-v1==0.13.0 +gymnasium==0.29.1 +h11==0.14.0 +httpcore==0.18.0 +httpx==0.25.0 +huggingface-hub==0.16.4 +identify==2.5.30 +idna==3.4 +iniconfig==2.0.0 +ipykernel==6.25.2 +ipython==8.16.1 +jedi==0.19.1 +Jinja2==3.1.2 +joblib==1.3.2 +jsonschema==4.19.1 +jsonschema-specifications==2023.7.1 +jupyter_client==8.4.0 +jupyter_core==5.4.0 +kiwisolver==1.4.5 +MarkupSafe==2.1.3 +matplotlib==3.8.0 +matplotlib-inline==0.1.6 +mpmath==1.3.0 +multidict==6.0.4 +multiprocess==0.70.15 +mypy==0.991 +mypy-extensions==1.0.0 +nbclient==0.6.8 +nbformat==5.9.2 +nbmake==1.4.6 +nest-asyncio==1.5.8 +networkx==3.1 +nltk==3.8.1 +nodeenv==1.8.0 +numpy==1.25.2 +openai==1.3.5 +opencv-python==4.8.1.78 +packaging==23.1 +pandas==2.0.3 +parso==0.8.3 +pexpect==4.8.0 +pickleshare==0.7.5 +Pillow==10.0.1 +platformdirs==3.11.0 +playwright==1.37.0 +pluggy==1.3.0 +pre-commit==3.0.1 +prompt-toolkit==3.0.39 +protobuf==4.24.3 +psutil==5.9.5 +ptyprocess==0.7.0 +pure-eval==0.2.2 +py==1.11.0 +pyarrow==12.0.1 +pydantic==2.4.2 +pydantic_core==2.10.1 +pyee==9.0.4 +Pygments==2.16.1 +pyparsing==3.1.1 +pytest==7.1.2 +pytest-asyncio==0.21.1 +pytest-xdist==3.3.1 +python-dateutil==2.8.2 +pytz==2023.3 +PyYAML==6.0.1 +pyzmq==25.1.1 +referencing==0.30.2 +regex==2023.8.8 +requests==2.31.0 +responses==0.18.0 +rpds-py==0.10.6 +safetensors==0.3.3 +scikit-image==0.22.0 +sentencepiece==0.1.99 +six==1.16.0 +sniffio==1.3.0 +soupsieve==2.5 +stack-data==0.6.3 +sympy==1.12 +text-generation==0.6.1 +tiktoken==0.4.0 +tokenizers==0.14.0 +tomli==2.0.1 +torch==2.0.1 +tornado==6.3.3 +tqdm==4.66.1 +traitlets==5.11.2 +transformers==4.34.0 +types-requests==2.31.0.10 +types-tqdm==4.66.0.1 +typing_extensions==4.7.1 +tzdata==2023.3 +urllib3==2.0.4 +virtualenv==20.24.5 +wcwidth==0.2.8 +websockets==11.0.3 +xxhash==3.3.0 +yarl==1.9.2 diff --git a/VAB-WebArena-Lite/new/run.py b/VAB-WebArena-Lite/run.py similarity index 99% rename from VAB-WebArena-Lite/new/run.py rename to VAB-WebArena-Lite/run.py index 9da1b7e..f56142a 100644 --- a/VAB-WebArena-Lite/new/run.py +++ b/VAB-WebArena-Lite/run.py @@ -188,6 +188,7 @@ def config() -> argparse.Namespace: parser.add_argument("--context_length", type=int, default=0) parser.add_argument("--max_tokens", type=int, default=384) parser.add_argument("--stop_token", type=str, default=None) + parser.add_argument("--proxy_url", type=str, default=None) parser.add_argument( "--max_retry", type=int, @@ -371,6 +372,7 @@ def test( # NOTE: captioning_fn here is used for LLM + captioning baselines. # This can be different from the captioning model used for evals. captioning_fn=caption_image_fn, + proxy_url=args.proxy_url, ) for config_file in config_file_list: @@ -395,7 +397,8 @@ def test( # subprocess to renew the cookie subprocess.run( [ - "python", + # "python", + "conda", "run", "-n", "vab", "python", "browser_env/auto_login.py", "--auth_folder", temp_dir, diff --git a/VAB-WebArena-Lite/run_demo.py b/VAB-WebArena-Lite/run_demo.py new file mode 100644 index 0000000..4c0ea95 --- /dev/null +++ b/VAB-WebArena-Lite/run_demo.py @@ -0,0 +1,456 @@ +"""Script to run end-to-end evaluation on the benchmark. + +Modified from https://github.com/web-arena-x/webarena/blob/main/run.py. +""" +import argparse +import json +import logging +import os +import random +import time +import tempfile +from pathlib import Path + +import openai +import requests +import torch +from beartype import beartype +from PIL import Image + +from agent import ( + PromptAgent, + construct_agent, +) +from agent.prompts import * +from browser_env import ( + Action, + ActionTypes, + ScriptBrowserEnv, + StateInfo, + Trajectory, + create_stop_action, +) +from browser_env.actions import is_equivalent +from browser_env.helper_functions import ( + RenderHelper, + get_action_description, +) +from evaluation_harness import image_utils + +LOG_FOLDER = "log_files" +Path(LOG_FOLDER).mkdir(parents=True, exist_ok=True) +LOG_FILE_NAME = f"{LOG_FOLDER}/log_{time.strftime('%Y%m%d%H%M%S', time.localtime())}_{random.randint(0, 10000)}.log" + +logger = logging.getLogger("logger") +logger.setLevel(logging.INFO) + +console_handler = logging.StreamHandler() +console_handler.setLevel(logging.DEBUG) +logger.addHandler(console_handler) + +file_handler = logging.FileHandler(LOG_FILE_NAME) +file_handler.setLevel(logging.DEBUG) +logger.addHandler(file_handler) + +# Set the log format +formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") +console_handler.setFormatter(formatter) +file_handler.setFormatter(formatter) + + +def config() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Run end-to-end evaluation on the benchmark" + ) + parser.add_argument( + "--render", action="store_true", help="Render the browser" + ) + + parser.add_argument( + "--slow_mo", + type=int, + default=0, + help="Slow down the browser by the specified amount", + ) + parser.add_argument( + "--action_set_tag", default="som", help="Action type" + ) + parser.add_argument( + "--observation_type", + choices=[ + "accessibility_tree", + "accessibility_tree_with_captioner", + "html", + "image", + "image_som", + ], + default="image_som", + help="Observation type", + ) + parser.add_argument( + "--current_viewport_only", + action="store_true", + help="Only use the current viewport for the observation", + ) + parser.add_argument("--viewport_width", type=int, default=1280) + parser.add_argument("--viewport_height", type=int, default=2048) + parser.add_argument("--save_trace_enabled", action="store_true") + parser.add_argument("--sleep_after_execution", type=float, default=0.0) + + parser.add_argument("--max_steps", type=int, default=30) + + # agent config + parser.add_argument("--agent_type", type=str, default="prompt") + parser.add_argument( + "--instruction_path", + type=str, + default="agent/prompts/jsons/p_som_cot_id_actree_3s.json", + ) + parser.add_argument( + "--parsing_failure_th", + help="When consecutive parsing failures exceed this threshold, the agent will terminate early.", + type=int, + default=3, + ) + parser.add_argument( + "--repeating_action_failure_th", + help="When consecutive repeated actions exceed this threshold, the agent will terminate early.", + type=int, + default=5, + ) + + parser.add_argument( + "--eval_captioning_model_device", + type=str, + default="cpu", + choices=["cpu", "cuda"], + help="Device to run eval captioning model on. By default, runs it on CPU.", + ) + parser.add_argument( + "--eval_captioning_model", + type=str, + default="Salesforce/blip2-flan-t5-xl", + choices=["Salesforce/blip2-flan-t5-xl"], + help="Captioning backbone for VQA-type evals.", + ) + parser.add_argument( + "--captioning_model", + type=str, + default="Salesforce/blip2-flan-t5-xl", + choices=["Salesforce/blip2-flan-t5-xl", "llava-hf/llava-1.5-7b-hf"], + help="Captioning backbone for accessibility tree alt text.", + ) + + # lm config + parser.add_argument("--provider", type=str, default="openai") + parser.add_argument("--model", type=str, default="gpt-4-vision-preview") + parser.add_argument("--mode", type=str, default="chat") + parser.add_argument("--temperature", type=float, default=1.0) + parser.add_argument("--top_p", type=float, default=0.9) + parser.add_argument("--context_length", type=int, default=0) + parser.add_argument("--max_tokens", type=int, default=384) + parser.add_argument("--stop_token", type=str, default=None) + parser.add_argument( + "--max_retry", + type=int, + help="max retry times to perform generations when parsing fails", + default=1, + ) + parser.add_argument( + "--max_obs_length", + type=int, + help="when not zero, will truncate the observation to this length before feeding to the model", + default=3840, + ) + + + # example config + parser.add_argument("--start_url", type=str, default="https://google.com") + parser.add_argument("--intent", type=str, required=True) + parser.add_argument("--image", type=str, default="", help="url of images, seperated by |AND|") + + # logging related + parser.add_argument("--result_dir", type=str, default="") + args = parser.parse_args() + + # check the whether the action space is compatible with the observation space + if ( + args.action_set_tag == "id_accessibility_tree" + and args.observation_type + not in [ + "accessibility_tree", + "accessibility_tree_with_captioner", + "image_som", + ] + ): + raise ValueError( + f"Action type {args.action_set_tag} is incompatible with the observation type {args.observation_type}" + ) + + return args + + +@beartype +def early_stop( + trajectory: Trajectory, max_steps: int, thresholds: dict[str, int] +) -> tuple[bool, str]: + """Check whether need to stop early""" + + # reach the max step + num_steps = (len(trajectory) - 1) / 2 + if num_steps >= max_steps: + return True, f"Reach max steps {max_steps}" + + last_k_actions: list[Action] + action_seq: list[Action] + + # Case: parsing failure for k times + k = thresholds["parsing_failure"] + last_k_actions = trajectory[1::2][-k:] # type: ignore[assignment] + if len(last_k_actions) >= k: + if all( + [ + action["action_type"] == ActionTypes.NONE + for action in last_k_actions + ] + ): + return True, f"Failed to parse actions for {k} times" + + # Case: same action for k times + k = thresholds["repeating_action"] + last_k_actions = trajectory[1::2][-k:] # type: ignore[assignment] + action_seq = trajectory[1::2] # type: ignore[assignment] + + if len(action_seq) == 0: + return False, "" + + last_action: Action = action_seq[-1] + + if last_action["action_type"] != ActionTypes.TYPE: + if len(last_k_actions) >= k: + if all( + [ + is_equivalent(action, last_action) + for action in last_k_actions + ] + ): + return True, f"Same action for {k} times" + + else: + # check the action sequence + if ( + sum([is_equivalent(action, last_action) for action in action_seq]) + >= k + ): + return True, f"Same typing action for {k} times" + + return False, "" + + +@beartype +def test( + args: argparse.Namespace, + config_file: str +) -> None: + scores = [] + max_steps = args.max_steps + + early_stop_thresholds = { + "parsing_failure": args.parsing_failure_th, + "repeating_action": args.repeating_action_failure_th, + } + + caption_image_fn = None # Don't use captioning for the demo, due to extra resources required to run BLIP-2. + + + agent = construct_agent( + args, + captioning_fn=caption_image_fn + if args.observation_type == "accessibility_tree_with_captioner" + else None, + ) # NOTE: captioning_fn here is used for captioning input images. + + assert args.render, "Rendering is required for end-to-end evaluation" + + env = ScriptBrowserEnv( + headless=not args.render, + slow_mo=args.slow_mo, + observation_type=args.observation_type, + current_viewport_only=args.current_viewport_only, + viewport_size={ + "width": args.viewport_width, + "height": args.viewport_height, + }, + save_trace_enabled=args.save_trace_enabled, + sleep_after_execution=args.sleep_after_execution, + # NOTE: captioning_fn here is used for LLM + captioning baselines. + # This can be different from the captioning model used for evals. + captioning_fn=caption_image_fn, + ) + + try: + render_helper = RenderHelper( + config_file, args.result_dir, args.action_set_tag + ) + + # Load task. + with open(config_file, 'r') as f: + _c = json.load(f) + intent = _c["intent"] + image_paths = _c.get("image", None) + images = [] + + # Load input images for the task, if any. + if image_paths is not None: + if isinstance(image_paths, str): + image_paths = [image_paths] + for image_path in image_paths: + # Load image either from the web or from a local path. + if image_path.startswith("http"): + input_image = Image.open(requests.get(image_path, stream=True).raw) + else: + input_image = Image.open(image_path) + + images.append(input_image) + + logger.info(f"[Config file]: {config_file}") + logger.info(f"[Intent]: {intent}") + + agent.reset(config_file) + trajectory: Trajectory = [] + obs, info = env.reset(options={"config_file": config_file}) + state_info: StateInfo = {"observation": obs, "info": info} + trajectory.append(state_info) + + meta_data = {"action_history": ["None"]} + while True: + early_stop_flag, stop_info = early_stop( + trajectory, max_steps, early_stop_thresholds + ) + + if early_stop_flag: + action = create_stop_action(f"Early stop: {stop_info}") + else: + try: + print('=' * 30) + print('Agent: Thinking...') + action = agent.next_action( + trajectory, + intent, + images=images, + meta_data=meta_data, + output_response=True + ) + except ValueError as e: + # get the error message + action = create_stop_action(f"ERROR: {str(e)}") + + trajectory.append(action) + + action_str = get_action_description( + action, + state_info["info"]["observation_metadata"], + action_set_tag=args.action_set_tag, + prompt_constructor=agent.prompt_constructor + if isinstance(agent, PromptAgent) + else None, + ) + render_helper.render( + action, state_info, meta_data, args.render_screenshot + ) + meta_data["action_history"].append(action_str) + + if action["action_type"] == ActionTypes.STOP: + break + + obs, _, terminated, _, info = env.step(action) + state_info = {"observation": obs, "info": info} + trajectory.append(state_info) + + if terminated: + # add a action place holder + trajectory.append(create_stop_action("")) + break + + if args.save_trace_enabled: + env.save_trace( + Path(args.result_dir) / "trace.zip" + ) + except openai.OpenAIError as e: + logger.info(f"[OpenAI Error] {repr(e)}") + except Exception as e: + logger.info(f"[Unhandled Error] {repr(e)}]") + import traceback + + # write to error file + with open(Path(args.result_dir) / "error.txt", "a") as f: + f.write(f"[Config file]: {config_file}\n") + f.write(f"[Unhandled Error] {repr(e)}\n") + f.write(traceback.format_exc()) # write stack trace to file + + render_helper.close() + + env.close() + + +def prepare(args: argparse.Namespace) -> None: + # convert prompt python files to json + from agent.prompts import to_json + + to_json.run() + + # prepare result dir + result_dir = args.result_dir + if not result_dir: + result_dir = ( + f"cache/results_{time.strftime('%Y%m%d%H%M%S', time.localtime())}" + ) + if not Path(result_dir).exists(): + Path(result_dir).mkdir(parents=True, exist_ok=True) + args.result_dir = result_dir + logger.info(f"Create result dir: {result_dir}") + + if not (Path(result_dir) / "traces").exists(): + (Path(result_dir) / "traces").mkdir(parents=True) + + # log the log file + with open(os.path.join(result_dir, "log_files.txt"), "a+") as f: + f.write(f"{LOG_FILE_NAME}\n") + + +@beartype +def dump_config(args: argparse.Namespace) -> None: + config_file = Path(args.result_dir) / "config.json" + if not config_file.exists(): + with open(config_file, "w") as f: + json.dump(vars(args), f, indent=4) + logger.info(f"Dump config to {config_file}") + + +if __name__ == "__main__": + os.environ["TOKENIZERS_PARALLELISM"] = "false" + args = config() + args.sleep_after_execution = 2.5 + prepare(args) + + _, tmp_config_file = tempfile.mkstemp(text=True) + images_url = None + if args.image: + images_url = args.image.split('|AND|') + with open(tmp_config_file, 'w') as f: + json.dump({ + "task_id": 0, + "start_url": args.start_url, + "intent": args.intent, + "image": images_url + }, f) + + args.render_screenshot = True + args.save_trace_enabled = True + + args.current_viewport_only = True + dump_config(args) + + test(args, tmp_config_file) + + os.remove(tmp_config_file) diff --git a/VAB-WebArena-Lite/new/score.py b/VAB-WebArena-Lite/score.py similarity index 100% rename from VAB-WebArena-Lite/new/score.py rename to VAB-WebArena-Lite/score.py diff --git a/VAB-WebArena-Lite/scripts/calc_breakdown_sr.py b/VAB-WebArena-Lite/scripts/calc_breakdown_sr.py new file mode 100644 index 0000000..f8a7697 --- /dev/null +++ b/VAB-WebArena-Lite/scripts/calc_breakdown_sr.py @@ -0,0 +1,149 @@ +"""Calculate the breakdown of success rate by different brekadown""" + +import json +import os +import argparse + + +def config() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument("--log_file", type=str, help="Path to the log file") + parser.add_argument( + "--config_file", + type=str, + help="Path to the config file", + default="config_files/wa/test_webarena.json", + ) + + args = parser.parse_args() + return args + + +def calc_sr(id_to_success: dict[int, bool]) -> dict[str, float]: + sr = { + "total": len(id_to_success), + "success": sum(id_to_success.values()), + "fail": len(id_to_success) - sum(id_to_success.values()), + } + # keep 2 decimal places + sr["success_rate"] = round(sr["success"] / sr["total"], 4) * 100 + + return sr + + +def parse_result(log_file: str) -> dict[int, bool]: + id_to_success: dict[int, bool] = {} + + # parse + with open(log_file, "r") as f: + for line in f: + if "[Config file]" in line: + id = os.path.basename(line.split()[-1]).split(".")[0] + elif "[Result]" in line: + if "(FAIL)" in line: + success = False + elif "(PASS)" in line: + success = True + else: + raise ValueError(f"Unknown result: {line}") + id_to_success[int(id)] = success + + return id_to_success + + +def main(args: argparse.Namespace): + id_to_success = parse_result(args.log_file) + + overall_sr = calc_sr(id_to_success) + + with open(args.config_file, "r") as f: + config = json.load(f) + + # get achievable and unachievable tasks + non_achievable = set() + for task in config: + task_id = int(task["task_id"]) + if ( + task["eval"]["eval_types"] == ["string_match"] + and task["eval"]["reference_answers"].get("fuzzy_match", "") == "N/A" + ): + non_achievable.add(task_id) + + achievable_sr = calc_sr( + {k: v for k, v in id_to_success.items() if k not in non_achievable} + ) + unachievable_sr = calc_sr( + {k: v for k, v in id_to_success.items() if k in non_achievable} + ) + + # get sr per website + website_to_ids = { + website: set() + for website in [ + "shopping", + "shopping_admin", + "gitlab", + "reddit", + "map", + "wikipedia", + ] + } + for task in config: + task_id = int(task["task_id"]) + cur_sites = task["sites"] + for site in cur_sites: + website_to_ids[site].add(task_id) + + website_sr = { + website: calc_sr( + {k: v for k, v in id_to_success.items() if k in website_to_ids[website]} + ) + for website in website_to_ids + } + + # by task type + task_type_to_ids = { + "info_seeking": set(), + "site_nav": set(), + "content_config": set(), + } + for task in config: + task_id = int(task["task_id"]) + if task["eval"]["eval_types"] == ["string_match"]: + task_type_to_ids["info_seeking"].add(task_id) + elif task["eval"]["eval_types"] == ["url_match"]: + task_type_to_ids["site_nav"].add(task_id) + else: + task_type_to_ids["content_config"].add(task_id) + + task_type_sr = { + task_type: calc_sr( + {k: v for k, v in id_to_success.items() if k in task_type_to_ids[task_type]} + ) + for task_type in task_type_to_ids + } + + print("=====================================") + print("Overall SR:") + print(overall_sr) + print("=====================================") + print("Achievable SR:") + print(achievable_sr) + print("=====================================") + print("Unachievable SR:") + print(unachievable_sr) + print("=====================================") + print("Website SR:") + for website, sr in website_sr.items(): + print(website) + print(sr) + print("=====================================") + print("Task type SR:") + for task_type, sr in task_type_sr.items(): + print(task_type) + print(sr) + + +if __name__ == "__main__": + args = config() + main(args) diff --git a/VAB-WebArena-Lite/scripts/check_error_runs.py b/VAB-WebArena-Lite/scripts/check_error_runs.py new file mode 100644 index 0000000..0039b56 --- /dev/null +++ b/VAB-WebArena-Lite/scripts/check_error_runs.py @@ -0,0 +1,157 @@ +"""Some executions may failed. +This script checks the recordings, print the task ids. +It deletes the recordings if needed.""" +import argparse +import glob +import os +import shutil +import sys + + +def merge_logs(result_folder: str, args: argparse.Namespace) -> str: + if not os.path.exists(f"{result_folder}/log_files.txt"): + sys.exit(1) + + with open(f"{result_folder}/log_files.txt", "r") as f: + log_files = f.readlines() + + merged_results = {} + for file in log_files: + with open(file.strip(), "r") as f: + lines = f.readlines() + + cur_log: list[str] = [] + index = None + for line in lines: + if "[Config file]" in line: + if ( + cur_log + and index + and os.path.exists(f"{result_folder}/render_{index}.html") + and len(cur_log) >= 3 + ): + merged_results[index] = cur_log + # update index and log + index = line.split("/")[-1].split(".")[0] + cur_log = [line] + else: + cur_log.append(line) + + if ( + cur_log + and index + and os.path.exists(f"{result_folder}/render_{index}.html") + and len(cur_log) >= 3 + ): + + merged_results[index] = cur_log + + # sort by the key + merged_results = dict( + sorted(merged_results.items(), key=lambda x: int(x[0])) + ) + + merged_log_path = f"{result_folder}/tmp_merged_log.txt" + with open(merged_log_path, "w") as f: + for k, v in merged_results.items(): + for line in v: + f.write(line) + print(f"Number of examples: {len(merged_results)}") + + unlog_examples = [] + for i in range(812): + if ( + os.path.exists(f"{result_folder}/render_{i}.html") + and str(i) not in merged_results + ): + unlog_examples.append(i) + + print(f"Number of unlogged examples: {len(unlog_examples)}") + print(unlog_examples) + if ( + args.delete_errors + or input("Do you want to delete these examples? (y/n)") == "y" + ): + for idx in unlog_examples: + os.remove(f"{args.result_folder}/render_{idx}.html") + + unifinished_examples = [ + i for i in range(0, 812) if str(i) not in merged_results + ] + print(f"Number of unfinished examples: {len(unifinished_examples)}") + print(unifinished_examples) + + return merged_log_path + + +def check_unhandled_errors(args: argparse.Namespace) -> int: + log_path = merge_logs(args.result_folder, args) + with open(log_path, "r") as f: + logs = f.read() + + error_examples = [] + for line in logs.split("\n"): + if "[Config file]" in line: + example_idx = line.split("/")[-1].split(".")[0] + if "[Unhandled Error]" in line or "[OpenAI Error]" in line: + error_examples.append(int(example_idx)) + + num_errors = len(error_examples) + print(f"Number of unhandled errors: {len(error_examples)}") + print(error_examples) + if ( + args.delete_errors + or input("Do you want to delete these examples? (y/n)") == "y" + ): + for idx in error_examples: + if os.path.exists(f"{args.result_folder}/render_{idx}.html"): + os.remove(f"{args.result_folder}/render_{idx}.html") + return num_errors + + +def check_unexpected_logout(args: argparse.Namespace) -> int: + target_strings = set( + [ + "Creating an account has many benefits: check out faster", + "Welcome, please sign in", + "Username or email", + "Keep me logged in", + ] + ) + + error_examples = [] + for render_file in glob.glob(f"{args.result_folder}/render_*.html"): + with open(render_file, "r") as f: + contents = f.read() + if any([s in contents for s in target_strings]): + task_id = int( + render_file.split("/")[-1].split(".")[0].split("_")[-1] + ) + error_examples.append(task_id) + print(f"Number of unexpected logout: {len(error_examples)}") + print(error_examples) + num_errors = len(error_examples) + if ( + args.delete_errors + or input("Do you want to delete these examples? (y/n)") == "y" + ): + for idx in error_examples: + if os.path.exists(f"{args.result_folder}/render_{idx}.html"): + os.remove(f"{args.result_folder}/render_{idx}.html") + + return num_errors + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("result_folder", type=str) + parser.add_argument("--delete_errors", action="store_true") + parser.add_argument("--tolerance", type=int, default=0) + + args = parser.parse_args() + n1 = check_unhandled_errors(args) + n2 = check_unexpected_logout(args) + if n1 + n2 > args.tolerance: + sys.exit(1) + else: + sys.exit(0) diff --git a/VAB-WebArena-Lite/scripts/collect_obs.py b/VAB-WebArena-Lite/scripts/collect_obs.py new file mode 100644 index 0000000..49317bc --- /dev/null +++ b/VAB-WebArena-Lite/scripts/collect_obs.py @@ -0,0 +1,52 @@ +"""Simple script to quickly get the observation of a page""" + +import json +import re +import time +from typing import Dict, Optional, Tuple, Type, Union, cast + +import pytest +from playwright.sync_api import Page, expect +import browser_env +from browser_env import ( + ScriptBrowserEnv, + create_id_based_action, + create_key_press_action, + create_playwright_action, + create_scroll_action, +) +from browser_env.env_config import * + +HEADLESS = False + + +def gen_tmp_storage_state() -> None: + with open(f"scripts/tmp_storage_state.json", "w") as f: + json.dump({"storage_state": ".auth/gitlab_state.json"}, f) + + +def get_observation( + observation_type: str, current_viewport_only: bool +) -> None: + env = ScriptBrowserEnv( + observation_type=observation_type, + current_viewport_only=current_viewport_only, + headless=HEADLESS, + sleep_after_execution=0.5, + ) + env.reset(options={"config_file": f"scripts/tmp_storage_state.json"}) + s = f"""page.goto("https://russmaxdesign.github.io/exercise/")""" + action_seq = s.split("\n") + + for action in action_seq: + action = action.strip() + obs, success, _, _, info = env.step(create_playwright_action(action)) + print(obs["text"]) + _ = input("Press enter to continue") + + +if __name__ == "__main__": + gen_tmp_storage_state() + obs_type = "accessibility_tree" + current_viewport_only = True + get_observation(obs_type, current_viewport_only) diff --git a/VAB-WebArena-Lite/new/generate_test_data.py b/VAB-WebArena-Lite/scripts/generate_test_data.py similarity index 100% rename from VAB-WebArena-Lite/new/generate_test_data.py rename to VAB-WebArena-Lite/scripts/generate_test_data.py diff --git a/VAB-WebArena-Lite/scripts/reset_reddit.sh b/VAB-WebArena-Lite/scripts/reset_reddit.sh new file mode 100644 index 0000000..7158891 --- /dev/null +++ b/VAB-WebArena-Lite/scripts/reset_reddit.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Define variables +CONTAINER_NAME="forum" + +docker stop $CONTAINER_NAME +docker rm $(docker ps -a | grep $CONTAINER_NAME | awk '{print $1}') +docker run --name $CONTAINER_NAME -p 9999:80 -d postmill-populated-exposed-withimg +# wait ~15 secs for all services to start +sleep 15 diff --git a/VAB-WebArena-Lite/scripts/reset_shopping.sh b/VAB-WebArena-Lite/scripts/reset_shopping.sh new file mode 100644 index 0000000..a344071 --- /dev/null +++ b/VAB-WebArena-Lite/scripts/reset_shopping.sh @@ -0,0 +1,30 @@ +#!/bin/bash +### Performs a full reset of the shopping environment. +### Note: This takes a while (~2 minutes), so it's not recommended to run this too frequently. + +# Define variables +CONTAINER_NAME="shopping" + +docker stop $CONTAINER_NAME +docker rm $(docker ps -a | grep $CONTAINER_NAME | awk '{print $1}') +docker run --name $CONTAINER_NAME -p 7770:80 -d shopping_final_0712 +# wait ~1 min for all services to start +sleep 60 + +docker exec $CONTAINER_NAME /var/www/magento2/bin/magento setup:store-config:set --base-url="http://localhost:7770" # no trailing slash +docker exec $CONTAINER_NAME mysql -u magentouser -pMyPassword magentodb -e 'UPDATE core_config_data SET value="http://localhost:7770/" WHERE path = "web/secure/base_url";' +docker exec $CONTAINER_NAME /var/www/magento2/bin/magento cache:flush + +docker exec $CONTAINER_NAME /var/www/magento2/bin/magento indexer:set-mode schedule catalogrule_product +docker exec $CONTAINER_NAME /var/www/magento2/bin/magento indexer:set-mode schedule catalogrule_rule +docker exec $CONTAINER_NAME /var/www/magento2/bin/magento indexer:set-mode schedule catalogsearch_fulltext +docker exec $CONTAINER_NAME /var/www/magento2/bin/magento indexer:set-mode schedule catalog_category_product +docker exec $CONTAINER_NAME /var/www/magento2/bin/magento indexer:set-mode schedule customer_grid +docker exec $CONTAINER_NAME /var/www/magento2/bin/magento indexer:set-mode schedule design_config_grid +docker exec $CONTAINER_NAME /var/www/magento2/bin/magento indexer:set-mode schedule inventory +docker exec $CONTAINER_NAME /var/www/magento2/bin/magento indexer:set-mode schedule catalog_product_category +docker exec $CONTAINER_NAME /var/www/magento2/bin/magento indexer:set-mode schedule catalog_product_attribute +docker exec $CONTAINER_NAME /var/www/magento2/bin/magento indexer:set-mode schedule catalog_product_price +docker exec $CONTAINER_NAME /var/www/magento2/bin/magento indexer:set-mode schedule cataloginventory_stock + + diff --git a/VAB-WebArena-Lite/scripts/run_classifieds_som.sh b/VAB-WebArena-Lite/scripts/run_classifieds_som.sh new file mode 100644 index 0000000..c78abf7 --- /dev/null +++ b/VAB-WebArena-Lite/scripts/run_classifieds_som.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +## Define the model, result directory, and instruction path variables +model="gpt-4-vision-preview" +result_dir="classifieds_gpt4_som" +instruction_path="agent/prompts/jsons/p_som_cot_id_actree_3s.json" +captioning_model="Salesforce/blip2-flan-t5-xl" + +# Define the batch size variable +batch_size=50 + +# Define the starting and ending indices +start_idx=0 +end_idx=$((start_idx + batch_size)) +max_idx=234 + +# Loop until the starting index is less than or equal to 466 +while [ $start_idx -le $max_idx ] +do + # Classifieds reset is quick, so we can do it after every example. + curl -X POST http://127.0.0.1:9980/index.php?page=reset -d "token=4b61655535e7ed388f0d40a93600254c" + bash prepare.sh + python run.py \ + --instruction_path $instruction_path \ + --test_start_idx $start_idx \ + --test_end_idx $end_idx \ + --model $model \ + --result_dir $result_dir \ + --test_config_base_dir=config_files/classifieds_visual \ + --repeating_action_failure_th 5 --viewport_height 2048 --max_obs_length 3840 \ + --captioning_model $captioning_model \ + --action_set_tag som --observation_type image_som + + # Increment the start and end indices by the batch size + start_idx=$((start_idx + batch_size)) + end_idx=$((end_idx + batch_size)) + + # Ensure the end index does not exceed 466 in the final iteration + if [ $end_idx -gt $max_idx ]; then + end_idx=$max_idx + fi +done diff --git a/VAB-WebArena-Lite/scripts/run_reddit_som.sh b/VAB-WebArena-Lite/scripts/run_reddit_som.sh new file mode 100644 index 0000000..294b4f5 --- /dev/null +++ b/VAB-WebArena-Lite/scripts/run_reddit_som.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +### Define the model, result directory, and instruction path variables +model="gpt-4-vision-preview" +result_dir="reddit_gpt4_som" +instruction_path="agent/prompts/jsons/p_som_cot_id_actree_3s.json" +captioning_model="Salesforce/blip2-flan-t5-xl" + +# Define the batch size variable +batch_size=30 + +# Define the starting and ending indices +start_idx=0 +end_idx=$((start_idx + batch_size)) +max_idx=210 + +# Loop until the starting index is less than or equal to 466 +while [ $start_idx -le $max_idx ] +do + # Run the scripts and the Python command with the current indices and defined variables + bash scripts/reset_reddit.sh + bash prepare.sh + python run.py \ + --instruction_path $instruction_path \ + --test_start_idx $start_idx \ + --test_end_idx $end_idx \ + --model $model \ + --result_dir $result_dir \ + --test_config_base_dir=config_files/test_reddit \ + --repeating_action_failure_th 5 --viewport_height 2048 --max_obs_length 3840 \ + --captioning_model $captioning_model \ + --action_set_tag som --observation_type image_som + + # Increment the start and end indices by the batch size + start_idx=$((start_idx + batch_size)) + end_idx=$((end_idx + batch_size)) + + # Ensure the end index does not exceed 466 in the final iteration + if [ $end_idx -gt $max_idx ]; then + end_idx=$max_idx + fi +done diff --git a/VAB-WebArena-Lite/scripts/run_shopping_som.sh b/VAB-WebArena-Lite/scripts/run_shopping_som.sh new file mode 100644 index 0000000..85db772 --- /dev/null +++ b/VAB-WebArena-Lite/scripts/run_shopping_som.sh @@ -0,0 +1,39 @@ +#!/bin/bash +### This script runs the GPT-4V + SoM models on the entire VWA shopping test set. + +model="gpt-4-vision-preview" +result_dir="shopping_gpt4_som" +instruction_path="agent/prompts/jsons/p_som_cot_id_actree_3s.json" + +# Define the batch size variable +batch_size=50 + +# Define the starting and ending indices +start_idx=0 +end_idx=$((start_idx + batch_size)) +max_idx=466 + +# Loop until the starting index is less than or equal to max_idx. +while [ $start_idx -le $max_idx ] +do + bash scripts/reset_shopping.sh + bash prepare.sh + python run.py \ + --instruction_path $instruction_path \ + --test_start_idx $start_idx \ + --test_end_idx $end_idx \ + --model $model \ + --result_dir $result_dir \ + --test_config_base_dir=config_files/test_shopping \ + --repeating_action_failure_th 5 --viewport_height 2048 --max_obs_length 3840 \ + --action_set_tag som --observation_type image_som + + # Increment the start and end indices by the batch size + start_idx=$((start_idx + batch_size)) + end_idx=$((end_idx + batch_size)) + + # Ensure the end index does not exceed 466 in the final iteration + if [ $end_idx -gt $max_idx ]; then + end_idx=$max_idx + fi +done diff --git a/VAB-WebArena-Lite/setup.cfg b/VAB-WebArena-Lite/setup.cfg new file mode 100644 index 0000000..0d57c06 --- /dev/null +++ b/VAB-WebArena-Lite/setup.cfg @@ -0,0 +1,25 @@ +[metadata] +name = webarena + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = "test_*.py" + +[options.extras_require] +dev = + pre-commit==3.0.1 + pytest==7.1.2 + mypy==0.991 + nbmake + pytest-asyncio + types-requests + +[options] +python_requires = >=3.7, <4 +packages = + browser_env + agent + evaluation_harness + llms +[mypy] +strict = true \ No newline at end of file diff --git a/VAB-WebArena-Lite/setup.py b/VAB-WebArena-Lite/setup.py new file mode 100644 index 0000000..57c026b --- /dev/null +++ b/VAB-WebArena-Lite/setup.py @@ -0,0 +1,4 @@ +from setuptools import setup + +if __name__ == "__main__": + setup() \ No newline at end of file diff --git a/VAB-WebArena-Lite/tests/conftest.py b/VAB-WebArena-Lite/tests/conftest.py new file mode 100644 index 0000000..b3bd25b --- /dev/null +++ b/VAB-WebArena-Lite/tests/conftest.py @@ -0,0 +1,86 @@ +from typing import AsyncGenerator, Generator + +import pytest +import pytest_asyncio + +from browser_env import AsyncScriptBrowserEnv, ScriptBrowserEnv + +HEADLESS = True +SLOW_MO = 0 + + +@pytest.fixture(scope="function") +def script_browser_env() -> Generator[ScriptBrowserEnv, None, None]: + """Create a ScriptBrowserEnv instance for testing. + It is automatically closed after the test session. + This is helpful when the test failed and the browser is still open. + """ + env = ScriptBrowserEnv( + headless=HEADLESS, + slow_mo=SLOW_MO, + ) + yield env + env.close() + + +@pytest.fixture(scope="function") +def current_viewport_script_browser_env() -> Generator[ + ScriptBrowserEnv, None, None +]: + env = ScriptBrowserEnv( + headless=HEADLESS, + slow_mo=SLOW_MO, + current_viewport_only=True, + ) + yield env + env.close() + + +@pytest.fixture(scope="function") +def accessibility_tree_script_browser_env() -> Generator[ + ScriptBrowserEnv, None, None +]: + env = ScriptBrowserEnv( + headless=HEADLESS, + slow_mo=SLOW_MO, + observation_type="accessibility_tree", + ) + yield env + env.close() + + +@pytest.fixture(scope="function") +def accessibility_tree_script_browser_env_with_sleep() -> Generator[ + ScriptBrowserEnv, None, None +]: + env = ScriptBrowserEnv( + headless=HEADLESS, + slow_mo=SLOW_MO, + observation_type="accessibility_tree", + sleep_after_execution=1.0 + ) + yield env + env.close() + + +@pytest.fixture(scope="function") +def accessibility_tree_current_viewport_script_browser_env() -> Generator[ + ScriptBrowserEnv, None, None +]: + env = ScriptBrowserEnv( + headless=HEADLESS, + slow_mo=SLOW_MO, + observation_type="accessibility_tree", + current_viewport_only=True, + ) + yield env + env.close() + + +@pytest_asyncio.fixture(scope="function", autouse=True) +async def async_script_browser_env() -> AsyncGenerator[ + AsyncScriptBrowserEnv, None +]: + env = AsyncScriptBrowserEnv(headless=HEADLESS, slow_mo=SLOW_MO) + yield env + await env.aclose() diff --git a/VAB-WebArena-Lite/tests/test_browser_env/sites/new_tab.html b/VAB-WebArena-Lite/tests/test_browser_env/sites/new_tab.html new file mode 100644 index 0000000..c03963c --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_browser_env/sites/new_tab.html @@ -0,0 +1,11 @@ + + + + Sample Page + + +

Welcome to My Website

+

Click the link below to open a new tab:

+ Visit Example.com + + diff --git a/VAB-WebArena-Lite/tests/test_browser_env/test_action_functionalities.py b/VAB-WebArena-Lite/tests/test_browser_env/test_action_functionalities.py new file mode 100644 index 0000000..dde87f1 --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_browser_env/test_action_functionalities.py @@ -0,0 +1,357 @@ +import re +from typing import Dict, Optional, Tuple, Type, Union, cast + +import pytest +from playwright.sync_api import Page, expect + +from browser_env import ( + ScriptBrowserEnv, + create_id_based_action, + create_key_press_action, + create_playwright_action, + create_scroll_action, +) + +HEADLESS = True +SLOW_MO = 0 + + +def test_frame_locator(script_browser_env: ScriptBrowserEnv) -> None: + env = script_browser_env + seq = """page.goto("https://www.littlewebhut.com/articles/html_iframe_example/") + page.frame_locator("iframe[name=\\"imgbox\\"]").get_by_role("img").click()""" + + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success + + +def test_basic(script_browser_env: ScriptBrowserEnv) -> None: + # click, fill, press, check, goto + env = script_browser_env + seq = """page.goto("https://demo.playwright.dev/todomvc/") + page.get_by_placeholder("What needs to be done?").click() + page.get_by_placeholder("What needs to be done?").fill("hello") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.get_by_placeholder("What needs to be done?").fill("world") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.get_by_placeholder("What needs to be done?").fill("yes") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.get_by_placeholder("What needs to be done?").fill("no") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.get_by_role("listitem").filter(has_text="world").get_by_role("checkbox", name="Toggle Todo").check() + page.get_by_role("button", name="Clear completed").click()""" + + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success + + +def test_hover(script_browser_env: ScriptBrowserEnv) -> None: + env = script_browser_env + seq = """page.goto("https://ianlunn.github.io/Hover/") + page.get_by_role("link", name="Download on GitHub").hover()""" + + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success + + +def test_select_option(script_browser_env: ScriptBrowserEnv) -> None: + env = script_browser_env + seq = """page.goto("https://russmaxdesign.github.io/exercise/#link-two") + page.get_by_role("combobox", name="Favourite mammal").select_option("African Wild Dog")""" + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success + + +def test_xpath(script_browser_env: ScriptBrowserEnv) -> None: + env = script_browser_env + + seq = """page.goto("https://demo.playwright.dev/todomvc/") + page.goto("https://demo.playwright.dev/todomvc/#/") + page.get_by_placeholder("What needs to be done?").click() + page.get_by_placeholder("What needs to be done?").fill("hello") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.get_by_role("link", name="Completed").click() + page.locator("xpath=/html/body/section/div/header/input").fill("no") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.goto("https://bic-berkeley.github.io/psych-214-fall-2016/string_literals.html") + page.locator("xpath=//*[@id=\'searchbox\']/div/form/input[1]").fill("type")""" + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success + + +def test_inter_page_actions( + script_browser_env: ScriptBrowserEnv, +) -> None: + env = script_browser_env + seq = """page.goto("https://demo.playwright.dev/todomvc/") + browser.new_tab() + browser.page_focus(0) + browser.page_focus(1) + page.page_close() + page.goto("https://google.com") + page.goto("https://demo.playwright.dev/todomvc/") + page.go_back() + page.go_forward()""" + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success + assert "https://demo.playwright.dev/todomvc" in info["page"].url + + +def test_scroll( + current_viewport_script_browser_env: ScriptBrowserEnv, +) -> None: + env = current_viewport_script_browser_env + env.reset() + _, success, _, _, _ = env.step(create_scroll_action("down")) + assert success + _, success, _, _, _ = env.step(create_scroll_action("up")) + assert success + + +def test_id_click( + accessibility_tree_current_viewport_script_browser_env: ScriptBrowserEnv, +) -> None: + env = accessibility_tree_current_viewport_script_browser_env + env.reset() + + obs, success, _, _, info = env.step( + create_playwright_action( + 'page.goto("https://russmaxdesign.github.io/exercise/")' + ) + ) + assert success + assert "link 'McKenna/Bell'" in obs["text"] + # get the id of the link + element_id = re.search(r"\[(\d+)\] link 'McKenna/Bell'", obs["text"]).group(1) # type: ignore + + obs, success, _, _, info = env.step( + create_id_based_action(f"click [{element_id}]") + ) + assert success + assert ( + info["page"].url + == "https://russmaxdesign.github.io/exercise/#link-four" + ) + + obs, success, _, _, info = env.step(create_scroll_action("down")) + assert "link 'Classification'" in obs["text"] + element_id = re.search(r"\[(\d+)\] link 'Classification'", obs["text"]).group(1) # type: ignore + + obs, success, _, _, info = env.step( + create_id_based_action(f"click [{element_id}]") + ) + assert success + assert ( + info["page"].url + == "https://russmaxdesign.github.io/exercise/#link-two" + ) + assert "radio 'Weekly'" in obs["text"] + element_id = re.search(r"\[(\d+)\] radio 'Weekly'", obs["text"]).group(1) # type: ignore + + obs, success, _, _, info = env.step( + create_id_based_action(f"click [{element_id}]") + ) + assert success + assert "radio 'Weekly'" in obs["text"] + + +def test_id_hover( + accessibility_tree_current_viewport_script_browser_env: ScriptBrowserEnv, +) -> None: + env = accessibility_tree_current_viewport_script_browser_env + env.reset() + + obs, success, _, _, info = env.step( + create_playwright_action( + 'page.goto("https://ianlunn.github.io/Hover/")' + ) + ) + assert success + assert "link 'Download on GitHub'" in obs["text"] + element_id = re.search(r"\[(\d+)\] link 'Download on GitHub'", obs["text"]).group(1) # type: ignore + + obs, success, _, _, info = env.step( + create_id_based_action(f"hover [{element_id}]") + ) + assert success + + +def test_key_press( + accessibility_tree_current_viewport_script_browser_env: ScriptBrowserEnv, +) -> None: + env = accessibility_tree_current_viewport_script_browser_env + env.reset() + + obs, success, _, _, info = env.step( + create_playwright_action( + 'page.goto("https://russmaxdesign.github.io/exercise/")' + ) + ) + assert success + print(obs["text"]) + + assert "StaticText 'Full name'" in obs["text"] + element_id = re.search(r"\[(\d+)\] StaticText 'Full name'", obs["text"]).group(1) # type: ignore + s = "My Name IS XYZ" + + obs, success, _, _, info = env.step( + create_id_based_action(f"type [{element_id}] [{s}] [0]") + ) + + assert success + expect(env.page.get_by_label("Full name")).to_be_focused() + expect(env.page.get_by_label("Full name")).to_have_value(s) + + obs, success, _, _, info = env.step( + create_id_based_action("press [meta+a]") + ) + assert success + + env.page.get_by_label("Full name").type(s) + expect(env.page.get_by_label("Full name")).to_have_value(s) + + obs, success, _, _, info = env.step(create_key_press_action("Enter")) + assert success + expect(env.page.get_by_label("Email")).to_be_focused() + + +def test_id_type( + accessibility_tree_current_viewport_script_browser_env: ScriptBrowserEnv, +) -> None: + env = accessibility_tree_current_viewport_script_browser_env + env.reset() + obs, success, _, _, info = env.step( + create_playwright_action( + 'page.goto("https://russmaxdesign.github.io/exercise/")' + ) + ) + assert success + assert "StaticText 'Full name'" in obs["text"] + s = "My Name IS XYZ" + element_id = re.search(r"\[(\d+)\] StaticText 'Full name'", obs["text"]).group(1) # type: ignore + + obs, success, _, _, info = env.step( + create_id_based_action(f"type [{element_id}] [{s}]") + ) + assert success + locator = env.page.get_by_label("Full name") + expect(locator).to_have_value(s) + + +def test_e2e_id_based_actions( + accessibility_tree_script_browser_env: ScriptBrowserEnv, +) -> None: + env = accessibility_tree_script_browser_env + env.reset() + obs, *_ = env.step( + create_id_based_action( + "goto [https://russmaxdesign.github.io/exercise/]" + ) + ) + element_id = re.search(r"\[(\d+)\] link 'What are mammals\?'", obs["text"]).group(1) # type: ignore + obs, *_ = env.step(create_id_based_action(f"click [{element_id}]")) + element_id = re.search(r"\[(\d+)\] textbox 'Email'", obs["text"]).group(1) # type: ignore + env.step( + create_id_based_action(f"type [{element_id}] [test@gmail.com] [0]") + ) + env.step(create_id_based_action("scroll [down]")) + env.step(create_id_based_action("scroll [up]")) + env.step(create_id_based_action("new_tab")) + env.step(create_id_based_action("tab_focus [0]")) + env.step(create_id_based_action("tab_focus [1]")) + env.step(create_id_based_action("goto [https://example.com/]")) + env.step(create_id_based_action("go_back")) + x = env.step(create_id_based_action("go_forward")) + assert x[-1]["page"].url == "https://example.com/" + x = env.step(create_id_based_action("tab_focus [0]")) + assert ( + x[-1]["page"].url + == "https://russmaxdesign.github.io/exercise/#link-one" + ) + + +def test_id_delete_input( + accessibility_tree_current_viewport_script_browser_env: ScriptBrowserEnv, +) -> None: + env = accessibility_tree_current_viewport_script_browser_env + env.reset() + obs, success, _, _, info = env.step( + create_playwright_action( + 'page.goto("https://russmaxdesign.github.io/exercise/")' + ) + ) + assert success + assert "StaticText 'Full name'" in obs["text"] + s = "My Name IS XYZ" + element_id = re.search(r"\[(\d+)\] StaticText 'Full name'", obs["text"]).group(1) # type: ignore + + obs, success, _, _, info = env.step( + create_id_based_action(f"type [{element_id}] [{s}]") + ) + assert success + locator = env.page.get_by_label("Full name") + expect(locator).to_have_value(s) + + obs, success, _, _, info = env.step( + create_id_based_action(f"click [{element_id}]") + ) + assert success + + obs, success, _, _, info = env.step( + create_id_based_action(f"press [Meta+a]") + ) + assert success + + obs, success, _, _, info = env.step( + create_id_based_action("press [backspace]") + ) + assert success + + new_s = "NEW" + obs, success, _, _, info = env.step( + create_id_based_action(f"type [{element_id}] [{new_s}]") + ) + locator = env.page.get_by_label("Full name") + expect(locator).to_have_value(new_s) + + +@pytest.mark.skip(reason="The current implementation does not deal with iframes") +def test_frame_action( + accessibility_tree_current_viewport_script_browser_env: ScriptBrowserEnv, +) -> None: + """Test when multiple frames are present on the page""" + env = accessibility_tree_current_viewport_script_browser_env + env.reset() + obs, success, _, _, info = env.step( + create_playwright_action( + 'page.goto("https://www.in-n-out.com/locations")' + ) + ) + assert success + assert "StaticText 'Search by Address or Zip'" in obs["text"] + element_id = re.search(r"\[(\d+)\] StaticText 'Search by Address or Zip'", obs["text"]).group(1) # type: ignore + + s = "random location" + obs, success, _, _, info = env.step( + create_id_based_action(f"type [{element_id}] [{s}]") + ) + assert success + assert s in obs["text"] diff --git a/VAB-WebArena-Lite/tests/test_browser_env/test_actions.py b/VAB-WebArena-Lite/tests/test_browser_env/test_actions.py new file mode 100644 index 0000000..332a32b --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_browser_env/test_actions.py @@ -0,0 +1,87 @@ +import numpy as np + +from browser_env import * + + +def test_is_equivalent() -> None: + for action_type in ActionTypes.__members__.values(): + action_a = create_random_action() + action_b = create_random_action() + if action_a["action_type"] != action_b["action_type"]: + assert not is_equivalent(action_a, action_b) + action_a["action_type"] = action_type + action_b["action_type"] = action_type + match action_type: + case ActionTypes.MOUSE_CLICK | ActionTypes.MOUSE_HOVER: + if not np.allclose(action_a["coords"], action_b["coords"]): + assert not is_equivalent(action_a, action_b) + action_a["coords"] = action_b["coords"] + assert is_equivalent(action_a, action_b) + case ActionTypes.KEYBOARD_TYPE: + if action_a["text"] != action_b["text"]: + assert not is_equivalent(action_a, action_b) + action_a["text"] = action_b["text"] + assert is_equivalent(action_a, action_b) + case ActionTypes.CLICK | ActionTypes.HOVER | ActionTypes.TYPE: + if action_a["element_id"] and action_b["element_id"]: + if action_a["element_id"] != action_b["element_id"]: + assert not is_equivalent(action_a, action_b) + action_a["element_id"] = action_b["element_id"] + assert is_equivalent(action_a, action_b) + elif action_a["element_id"] and action_b["element_id"]: + if action_a["element_role"] != action_b["element_role"]: + assert not is_equivalent(action_a, action_b) + action_a["element_role"] = action_b["element_role"] + if action_a["element_name"] != action_b["element_name"]: + assert not is_equivalent(action_a, action_b) + action_a["element_name"] = action_b["element_name"] + assert is_equivalent(action_a, action_b) + elif action_a["pw_code"] and action_b["pw_code"]: + if action_a["pw_code"] != action_b["pw_code"]: + assert not is_equivalent(action_a, action_b) + action_a["pw_code"] = action_b["pw_code"] + assert is_equivalent(action_a, action_b) + else: + action_a["element_id"] = action_b["element_id"] + assert is_equivalent(action_a, action_b) + case ActionTypes.GOTO_URL: + if action_a["url"] != action_b["url"]: + assert not is_equivalent(action_a, action_b) + action_a["url"] = action_b["url"] + assert is_equivalent(action_a, action_b) + case ActionTypes.PAGE_FOCUS: + if action_a["page_number"] != action_b["page_number"]: + assert not is_equivalent(action_a, action_b) + action_a["page_number"] = action_b["page_number"] + assert is_equivalent(action_a, action_b) + case ActionTypes.SCROLL: + da = "up" if "up" in action_a["direction"] else "down" + db = "up" if "up" in action_b["direction"] else "down" + if da != db: + assert not is_equivalent(action_a, action_b) + action_a["direction"] = action_b["direction"] + assert is_equivalent(action_a, action_b) + case ActionTypes.KEY_PRESS: + if action_a["key_comb"] != action_b["key_comb"]: + assert not is_equivalent(action_a, action_b) + action_a["key_comb"] = action_b["key_comb"] + assert is_equivalent(action_a, action_b) + case ActionTypes.CHECK | ActionTypes.SELECT_OPTION: + if action_a["pw_code"] != action_b["pw_code"]: + assert not is_equivalent(action_a, action_b) + action_a["pw_code"] = action_b["pw_code"] + assert is_equivalent(action_a, action_b) + case ActionTypes.STOP: + if action_a["answer"] != action_b["answer"]: + assert not is_equivalent(action_a, action_b) + action_a["answer"] = action_b["answer"] + assert is_equivalent(action_a, action_b) + case _: + assert is_equivalent(action_a, action_b) + + +def test_action2create_function() -> None: + for _ in range(1000): + action = create_random_action() + create_function = action2create_function(action) + assert is_equivalent(action, eval(create_function)) diff --git a/VAB-WebArena-Lite/tests/test_browser_env/test_auth_cookie.py b/VAB-WebArena-Lite/tests/test_browser_env/test_auth_cookie.py new file mode 100644 index 0000000..2456a7a --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_browser_env/test_auth_cookie.py @@ -0,0 +1,67 @@ +import asyncio +import json + +from browser_env import * + +auth_json = { + "cookies": [ + { + "name": "session-username", + "value": "standard_user", + "domain": "www.saucedemo.com", + "path": "/", + "httpOnly": False, + "secure": False, + "sameSite": "Lax", + } + ], + "origins": [], +} + + +def test_auth_cookie() -> None: + env = ScriptBrowserEnv() + env.reset() + _, reward, _, _, info = env.step( + create_goto_url_action("https://www.saucedemo.com/inventory.html"), + ) + assert reward == 1 + assert "page" in info and isinstance(info["page"], DetachedPage) + assert info["page"].url == "https://www.saucedemo.com/" + json.dump(auth_json, open("/tmp/auth.json", "w")) + instance_config = {"storage_state": "/tmp/auth.json"} + json.dump(instance_config, open("/tmp/config.json", "w")) + env.reset(options={"config_file": "/tmp/config.json"}) + _, reward, _, _, info = env.step( + create_goto_url_action("https://www.saucedemo.com/inventory.html"), + ) + assert reward == 1 + assert "page" in info and isinstance(info["page"], DetachedPage) + assert info["page"].url == "https://www.saucedemo.com/inventory.html" + env.close() + + +def test_async_auth_cookie() -> None: + env = AsyncScriptBrowserEnv() + + async def _test() -> None: + await env.areset() + _, reward, _, _, info = await env.astep( + create_goto_url_action("https://www.saucedemo.com/inventory.html"), + ) + assert reward == 1 + assert "page" in info and isinstance(info["page"], DetachedPage) + assert info["page"].url == "https://www.saucedemo.com/" + json.dump(auth_json, open("/tmp/auth.json", "w")) + instance_config = {"storage_state": "/tmp/auth.json"} + json.dump(instance_config, open("/tmp/config.json", "w")) + await env.areset(options={"config_file": "/tmp/config.json"}) + _, reward, _, _, info = await env.astep( + create_goto_url_action("https://www.saucedemo.com/inventory.html"), + ) + assert reward == 1 + assert "page" in info and isinstance(info["page"], DetachedPage) + assert info["page"].url == "https://www.saucedemo.com/inventory.html" + await env.aclose() + + asyncio.run(_test()) diff --git a/VAB-WebArena-Lite/tests/test_browser_env/test_playwright_actions.py b/VAB-WebArena-Lite/tests/test_browser_env/test_playwright_actions.py new file mode 100644 index 0000000..ce55eeb --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_browser_env/test_playwright_actions.py @@ -0,0 +1,89 @@ +from typing import Dict, Generator, Optional, Tuple, Type, Union, cast + +import pytest +from playwright.sync_api import Page + +from browser_env import ScriptBrowserEnv, create_playwright_action + +HEADLESS = True +SLOW_MO = 0 + + +def test_frame_locator(script_browser_env: ScriptBrowserEnv) -> None: + env = script_browser_env + seq = """page.goto("https://www.littlewebhut.com/articles/html_iframe_example/") + page.frame_locator("iframe[name=\\"imgbox\\"]").get_by_role("img").click()""" + + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success + + +def test_basic(script_browser_env: ScriptBrowserEnv) -> None: + # click, fill, press, check, goto + env = script_browser_env + seq = """page.goto("https://demo.playwright.dev/todomvc/") + page.get_by_placeholder("What needs to be done?").click() + page.get_by_placeholder("What needs to be done?").fill("hello") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.get_by_placeholder("What needs to be done?").fill("world") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.get_by_placeholder("What needs to be done?").fill("yes") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.get_by_placeholder("What needs to be done?").fill("no") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.get_by_role("listitem").filter(has_text="world").get_by_role("checkbox", name="Toggle Todo").check() + page.get_by_role("button", name="Clear completed").click()""" + + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success + + +@pytest.mark.skip(reason="not important, but the site is flaky") +def test_hover(script_browser_env: ScriptBrowserEnv) -> None: + env = script_browser_env + seq = """page.goto("https://www.w3schools.com/cssref/tryit.php?filename=trycss_sel_hover") + page.frame_locator("iframe[name=\\'iframeResult\\']").get_by_role("link", name="w3schools.com").hover()""" + + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success + + +@pytest.mark.skip(reason="not important, but the site is flaky") +def test_select_option(script_browser_env: ScriptBrowserEnv) -> None: + env = script_browser_env + seq = """page.goto("https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_select") + page.frame_locator("iframe[name=\\'iframeResult\\']").get_by_role("combobox", name="Choose a car:").select_option("opel")""" + + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success + + +def test_xpath(script_browser_env: ScriptBrowserEnv) -> None: + env = script_browser_env + seq = """page.goto("https://demo.playwright.dev/todomvc/") + page.goto("https://demo.playwright.dev/todomvc/#/") + page.get_by_placeholder("What needs to be done?").click() + page.get_by_placeholder("What needs to be done?").fill("hello") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.get_by_role("link", name="Completed").click() + page.locator("xpath=/html/body/section/div/header/input").fill("no") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.goto("https://bic-berkeley.github.io/psych-214-fall-2016/string_literals.html") + page.locator("xpath=//*[@id=\'searchbox\']/div/form/input[1]").fill("type")""" + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success diff --git a/VAB-WebArena-Lite/tests/test_browser_env/test_script_browser_env.py b/VAB-WebArena-Lite/tests/test_browser_env/test_script_browser_env.py new file mode 100644 index 0000000..3d14070 --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_browser_env/test_script_browser_env.py @@ -0,0 +1,292 @@ +import asyncio +import collections +import json +import os +import tempfile +from typing import Callable, Dict, Optional, Tuple, Type, Union, cast + +import pytest +from gymnasium.vector import AsyncVectorEnv +from playwright.sync_api import Page + +from browser_env import ( + Action, + AsyncScriptBrowserEnv, + DetachedPage, + ScriptBrowserEnv, + create_focus_and_click_action, + create_goto_url_action, + create_keyboard_type_action, + create_playwright_action, + create_scroll_action, +) +from browser_env.actions import create_id_based_action +from browser_env.env_config import ACCOUNTS, REDDIT, SHOPPING + +@pytest.mark.skip(reason="The actions are deprecated") +def test_script_browser_env(script_browser_env: ScriptBrowserEnv) -> None: + env = script_browser_env + env.reset() + env.step( + create_goto_url_action("http://www.example.com"), + ) + env.step( + create_focus_and_click_action( + element_role="link", + element_name="More", + ), + ) + _, _, _, _, info = env.step( + create_focus_and_click_action( + element_role="link", + element_name="2606", + ) + ) + assert isinstance(info["page"], DetachedPage) + assert info["page"].url == "https://www.rfc-editor.org/rfc/rfc2606.html" + + +@pytest.mark.skip(reason="Async not supported") +async def test_async_script_browser_env( + async_script_browser_env: AsyncScriptBrowserEnv, +) -> None: + env = async_script_browser_env + await env.areset() + await env.astep( + create_goto_url_action("http://www.example.com"), + ) + await env.astep( + create_focus_and_click_action( + element_role="link", + element_name="More", + ), + ) + _, _, _, _, info = await env.astep( + create_focus_and_click_action( + element_role="link", + element_name="2606", + ) + ) + assert isinstance(info["page"], DetachedPage) + assert info["page"].url == "https://www.rfc-editor.org/rfc/rfc2606.html" + + +def collate_actions(actions: list[Action]) -> dict[str, list[object]]: + action_dict = collections.defaultdict(list) + for action in actions: + for key, value in action.items(): + action_dict[key].append(value) + return action_dict + + +@pytest.mark.skip(reason="Gym doesn't support self-defined observations") +def test_parallel_script_browser_env() -> None: + vector_env = AsyncVectorEnv( + [ + lambda: ScriptBrowserEnv(), + lambda: ScriptBrowserEnv(), + ], + shared_memory=True, + ) + vector_env.reset() + vector_env.step( + collate_actions( + [ + create_goto_url_action("http://www.example.com"), + ] + * 2 + ) + ) + vector_env.step( + collate_actions( + [ + create_focus_and_click_action( + element_role="link", + element_name="More", + ), + ] + * 2 + ) + ) + _, _, _, _, info = vector_env.step( + collate_actions( + [ + create_focus_and_click_action( + element_role="link", + element_name="2606", + ), + create_focus_and_click_action( + element_role="link", + element_name="6761", + ), + ] + ) + ) + # assert is_bearable(info["page"].tolist(), list[DetachedPage]) + assert info["page"][0].url == "https://www.rfc-editor.org/rfc/rfc2606.html" + assert info["page"][1].url == "https://www.rfc-editor.org/rfc/rfc6761.html" + vector_env.close() # type: ignore[no-untyped-call] + + +def test_focus_placeholder_and_label( + script_browser_env: ScriptBrowserEnv, +) -> None: + env = script_browser_env + env.reset() + for action in [ + create_goto_url_action("https://demo.applitools.com"), + create_focus_and_click_action("placeholder", "Enter your username"), + create_keyboard_type_action("abc"), + create_focus_and_click_action("placeholder", "Enter your password"), + create_keyboard_type_action("123"), + create_focus_and_click_action("label", "Remember Me"), + create_focus_and_click_action("link", "Sign in"), + ]: + _, success, _, _, info = env.step(action) + assert success + assert info["page"].url == "https://demo.applitools.com/app.html" + + +def test_html_current_viewport( + current_viewport_script_browser_env: ScriptBrowserEnv, +) -> None: + s1 = "detailed information about how mammals could be classified." + s2 = "Types of mammals" + env = current_viewport_script_browser_env + env.reset() + obs, success, _, _, info = env.step( + create_playwright_action( + 'page.goto("https://russmaxdesign.github.io/exercise/")' + ) + ) + assert success + assert s1 in obs["text"] and s2 not in obs["text"] + obs, success, _, _, info = env.step(create_scroll_action("down")) + assert success + assert s1 not in obs["text"] and s2 in obs["text"] + + +def test_accessibility_tree( + accessibility_tree_script_browser_env: ScriptBrowserEnv, +) -> None: + s1 = "checkbox 'Yes'" + s2 = "button 'Submit'" + env = accessibility_tree_script_browser_env + env.reset() + obs, success, _, _, info = env.step( + create_playwright_action( + 'page.goto("https://russmaxdesign.github.io/exercise/")' + ) + ) + assert success + assert s1 in obs["text"] and s2 in obs["text"] + + +def test_accessibility_tree_viewport( + accessibility_tree_current_viewport_script_browser_env: ScriptBrowserEnv, +) -> None: + s1 = "combobox 'Favourite mammal'" + s2 = "gridcell 'Canyon bat'" + s3 = "heading 'Useful links'" + env = accessibility_tree_current_viewport_script_browser_env + env.reset() + + obs, success, _, _, info = env.step( + create_playwright_action( + 'page.goto("https://russmaxdesign.github.io/exercise/")' + ) + ) + assert success + assert ( + s1 in obs["text"] and s2 not in obs["text"] and s3 not in obs["text"] + ) + obs, success, _, _, info = env.step(create_scroll_action("down")) + assert success + assert ( + s1 not in obs["text"] and s2 in obs["text"] and s3 not in obs["text"] + ) + + obs, success, _, _, info = env.step(create_scroll_action("down")) + assert success + assert s1 not in obs["text"] and s2 in obs["text"] and s3 in obs["text"] + + +def test_multiple_start_url(script_browser_env: ScriptBrowserEnv) -> None: + temp_config = tempfile.NamedTemporaryFile("w", delete=False) + config = { + "require_login": False, + "start_url": f"{REDDIT} |AND| {REDDIT}/forums", + } + json.dump(config, temp_config) + temp_config.close() + + env = script_browser_env + env.reset(options={"config_file": temp_config.name}) + assert len(env.context.pages) == 2 + assert env.context.pages[0].url == f"{REDDIT}/" + assert env.context.pages[1].url == f"{REDDIT}/forums", env.context.pages[ + 1 + ].url + + +def test_observation_tab_information( + accessibility_tree_current_viewport_script_browser_env: ScriptBrowserEnv, +) -> None: + env = accessibility_tree_current_viewport_script_browser_env + env.reset() + obs, *_ = env.step( + create_id_based_action( + "goto [https://russmaxdesign.github.io/exercise/]" + ) + ) + obs, *_ = env.step(create_id_based_action("new_tab")) + + obs, *_ = env.step( + create_id_based_action("goto [https:///www.google.com]") + ) + assert obs["text"].startswith( # type: ignore[union-attr] + "Tab 0: Exercise page for keyboard and screen reader use | Tab 1 (current): Google" + ) + + obs, *_ = env.step(create_id_based_action("tab_focus [0]")) + + assert obs["text"].startswith( # type: ignore[union-attr] + "Tab 0 (current): Exercise page for keyboard and screen reader use | Tab 1: Google" + ) + + +def test_accessibility_tree_observation_update( + accessibility_tree_current_viewport_script_browser_env: ScriptBrowserEnv, +) -> None: + env = accessibility_tree_current_viewport_script_browser_env + env.reset() + obs, *_ = env.step( + create_playwright_action( + "page.goto('https://russmaxdesign.github.io/exercise/')" + ) + ) + obs, *_ = env.step( + create_playwright_action( + 'page.get_by_label("Full name").fill("UNIQUE_NAME")' + ) + ) + assert "UNIQUE_NAME" in obs["text"] + + +def test_click_open_new_tab( + accessibility_tree_script_browser_env_with_sleep: ScriptBrowserEnv, +) -> None: + env = accessibility_tree_script_browser_env_with_sleep + env.reset() + env.step( + create_playwright_action( + f"page.goto('file:///{os.getcwd()}/tests/test_browser_env/sites/new_tab.html')" + ) + ) + obs, *_, info = env.step( + create_playwright_action( + 'page.get_by_role("link", name="Visit Example.com").click()' + ) + ) + assert "heading 'Example Domain'" in obs["text"] + assert "www.example.com" in info['page'].url diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/exact_image_failure.json b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/exact_image_failure.json new file mode 100644 index 0000000..297916c --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/exact_image_failure.json @@ -0,0 +1,30 @@ +{ + "sites": ["reddit"], + "task_id": 0, + "require_login": true, + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Make a Reddit post with this image ({{image_url}}) in the most relevant subreddit.", + "intent": "Make a Reddit post with this image (__REDDIT__/submission_images/58fd8145d1b2dfa2dfddb2f29c481e66d38c487cc1b2e1d613cb086b0c1caaad.png) in the most relevant subreddit.", + "image": "__REDDIT__/submission_images/58fd8145d1b2dfa2dfddb2f29c481e66d38c487cc1b2e1d613cb086b0c1caaad.png", + "instantiation_dict": { + "image_url": "__REDDIT__/submission_images/58fd8145d1b2dfa2dfddb2f29c481e66d38c487cc1b2e1d613cb086b0c1caaad.png" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/funny/125610/guilty-oc", + "url_note": "GOLD in PRED", + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_fuzzy_image_match": "__REDDIT__/submission_images/58fd8145d1b2dfa2dfddb2f29c481e66d38c487cc1b2e1d613cb086b0c1caaad.png" + } + ] + } +} diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/exact_image_resize_failure_1.json b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/exact_image_resize_failure_1.json new file mode 100644 index 0000000..c28b16c --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/exact_image_resize_failure_1.json @@ -0,0 +1,30 @@ +{ + "sites": ["reddit"], + "task_id": 0, + "require_login": true, + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Make a Reddit post with this image ({{image_url}}) in the most relevant subreddit.", + "intent": "Make a Reddit post with this image (__REDDIT__/submission_images/5ab78381e020c33a6a221266260ca9811d6ad7af36df15b5edbf6b5c8f0d6280.jpg) in the most relevant subreddit.", + "image": "__REDDIT__/submission_images/5ab78381e020c33a6a221266260ca9811d6ad7af36df15b5edbf6b5c8f0d6280.jpg", + "instantiation_dict": { + "image_url": "__REDDIT__/submission_images/5ab78381e020c33a6a221266260ca9811d6ad7af36df15b5edbf6b5c8f0d6280.jpg" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/funny/125684/neighborhood-watch", + "url_note": "GOLD in PRED", + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_fuzzy_image_match": "__REDDIT__/submission_images/5ab78381e020c33a6a221266260ca9811d6ad7af36df15b5edbf6b5c8f0d6280.jpg" + } + ] + } +} diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/exact_image_resize_failure_2.json b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/exact_image_resize_failure_2.json new file mode 100644 index 0000000..c28b16c --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/exact_image_resize_failure_2.json @@ -0,0 +1,30 @@ +{ + "sites": ["reddit"], + "task_id": 0, + "require_login": true, + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Make a Reddit post with this image ({{image_url}}) in the most relevant subreddit.", + "intent": "Make a Reddit post with this image (__REDDIT__/submission_images/5ab78381e020c33a6a221266260ca9811d6ad7af36df15b5edbf6b5c8f0d6280.jpg) in the most relevant subreddit.", + "image": "__REDDIT__/submission_images/5ab78381e020c33a6a221266260ca9811d6ad7af36df15b5edbf6b5c8f0d6280.jpg", + "instantiation_dict": { + "image_url": "__REDDIT__/submission_images/5ab78381e020c33a6a221266260ca9811d6ad7af36df15b5edbf6b5c8f0d6280.jpg" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/funny/125684/neighborhood-watch", + "url_note": "GOLD in PRED", + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_fuzzy_image_match": "__REDDIT__/submission_images/5ab78381e020c33a6a221266260ca9811d6ad7af36df15b5edbf6b5c8f0d6280.jpg" + } + ] + } +} diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/exact_image_resize_template.json b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/exact_image_resize_template.json new file mode 100644 index 0000000..ed60541 --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/exact_image_resize_template.json @@ -0,0 +1,31 @@ +{ + "sites": ["reddit"], + "task_id": 0, + "require_login": true, + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Make a Reddit post with this image ({{image_url}}) in the most relevant subreddit.", + "intent": "Make a Reddit post with this image (__REDDIT__/submission_images/66a1b3d1334e5f2ec04fdab4d0fa4892744e9dda6c38b5d3ae971d7dc2c1dfdb.jpg) in the most relevant subreddit.", + "image": "images", + "instantiation_dict": { + "image_url": "__REDDIT__/submission_images/66a1b3d1334e5f2ec04fdab4d0fa4892744e9dda6c38b5d3ae971d7dc2c1dfdb.jpg" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/food/18838/homemade-obligatory-halloween-pumpkin-loaf", + "url_note": "GOLD in PRED", + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_fuzzy_image_match": "", + "ssim_threshold": 0.95 + } + ] + } +} diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/exact_image_success.json b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/exact_image_success.json new file mode 100644 index 0000000..7bc0a5e --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/exact_image_success.json @@ -0,0 +1,30 @@ +{ + "sites": ["reddit"], + "task_id": 0, + "require_login": true, + "start_url": "__REDDIT__/forums/all", + "geolocation": null, + "intent_template": "Make a Reddit post with this image ({{image_url}}) in the most relevant subreddit.", + "intent": "Make a Reddit post with this image (__REDDIT__/submission_images/58fd8145d1b2dfa2dfddb2f29c481e66d38c487cc1b2e1d613cb086b0c1caaad.png) in the most relevant subreddit.", + "image": "__REDDIT__/submission_images/58fd8145d1b2dfa2dfddb2f29c481e66d38c487cc1b2e1d613cb086b0c1caaad.png", + "instantiation_dict": { + "image_url": "__REDDIT__/submission_images/58fd8145d1b2dfa2dfddb2f29c481e66d38c487cc1b2e1d613cb086b0c1caaad.png" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "page_image_query" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/funny/125684/neighborhood-watch", + "url_note": "GOLD in PRED", + "page_image_query": [ + { + "eval_image_url": "last", + "eval_image_class": ".submission__image", + "eval_fuzzy_image_match": "__REDDIT__/submission_images/58fd8145d1b2dfa2dfddb2f29c481e66d38c487cc1b2e1d613cb086b0c1caaad.png" + } + ] + } +} diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/func_eval_fail.json b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/func_eval_fail.json new file mode 100644 index 0000000..0ffdd0a --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/func_eval_fail.json @@ -0,0 +1,29 @@ +{ + "sites": ["shopping"], + "task_id": 0, + "require_login": true, + "storage_state": null, + "start_url": null, + "geolocation": null, + "intent_template": "", + "instantiation_dict": {}, + "intent": "", + "require_reset": false, + "eval": { + "eval_types": ["program_html"], + "reference_answers": [], + "reference_url": "", + "program_html": [ + { + "url": "last", + "required_contents": {"must_include": ["80"]}, + "locator": "func:shopping_get_sku_latest_review_rating('B09BCM56J7')" + }, + { + "url": "last", + "required_contents": {"must_include": ["cupcakecupcake"]}, + "locator": "func:shopping_get_sku_latest_review_author('B09BCM56J7')" + } + ] + } +} diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/func_eval_success.json b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/func_eval_success.json new file mode 100644 index 0000000..d3d3df8 --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/func_eval_success.json @@ -0,0 +1,29 @@ +{ + "sites": ["shopping"], + "task_id": 0, + "require_login": true, + "storage_state": null, + "start_url": null, + "geolocation": null, + "intent_template": "", + "instantiation_dict": {}, + "intent": "", + "require_reset": false, + "eval": { + "eval_types": ["program_html"], + "reference_answers": [], + "reference_url": "", + "program_html": [ + { + "url": "last", + "required_contents": {"must_include": ["100"]}, + "locator": "func:shopping_get_sku_latest_review_rating('B09BCM56J7')" + }, + { + "url": "last", + "required_contents": {"must_include": ["cupcakecupcake"]}, + "locator": "func:shopping_get_sku_latest_review_author('B09BCM56J7')" + } + ] + } +} diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/func_url_func_1.json b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/func_url_func_1.json new file mode 100644 index 0000000..7dbd8a2 --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/func_url_func_1.json @@ -0,0 +1,24 @@ +{ + "sites": ["shopping"], + "task_id": 0, + "require_login": true, + "storage_state": null, + "start_url": null, + "geolocation": null, + "intent_template": "", + "instantiation_dict": {}, + "intent": "", + "require_reset": false, + "eval": { + "eval_types": ["program_html"], + "reference_answers": [], + "reference_url": "", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": {"must_include": ["​"]} + } + ] + } +} diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/func_url_func_2.json b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/func_url_func_2.json new file mode 100644 index 0000000..bcb776d --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/func_url_func_2.json @@ -0,0 +1,33 @@ +{ + "sites": [ + "gitlab" + ], + "task_id": 0, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": null, + "geolocation": null, + "intent_template": "", + "instantiation_dict": {}, + "intent": "", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": [], + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/primer/design/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'byteblaze')", + "required_contents": {"must_include": ["Developer"]} + }, + { + "url": "__GITLAB__/primer/design/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'primer')", + "required_contents": {"must_include": ["Owner"]} + } + ] + } +} diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/html_content_element_exact_match.json b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/html_content_element_exact_match.json new file mode 100644 index 0000000..2b89bfa --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/html_content_element_exact_match.json @@ -0,0 +1,29 @@ +{ + "sites": ["shopping"], + "task_id": 0, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": null, + "geolocation": null, + "intent_template": "", + "instantiation_dict": {}, + "intent": "", + "require_reset": false, + "eval": { + "eval_types": ["program_html"], + "reference_answers": [], + "reference_url": "", + "program_html": [ + { + "url": "last", + "required_contents": {"must_include": ["Hello World"], "must_exclude": ["Goodbye World"]}, + "locator": "document.querySelector('[id=\"form-name\"').value" + }, + { + "url": "last", + "required_contents": {"must_include": ["alexisxy@hotmail.com"], "must_exclude": ["alexisxy@gmail.com"]}, + "locator": "document.querySelector('[id=\"form-email\"').value" + } + ] + } +} diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/html_content_exact_match.json b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/html_content_exact_match.json new file mode 100644 index 0000000..4f4ad1f --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/html_content_exact_match.json @@ -0,0 +1,29 @@ +{ + "sites": ["shopping"], + "task_id": 0, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": null, + "geolocation": null, + "intent_template": "", + "instantiation_dict": {}, + "intent": "", + "require_reset": false, + "eval": { + "eval_types": ["program_html"], + "reference_answers": [], + "reference_url": "", + "program_html": [ + { + "url": "last", + "required_contents": {"must_include": ["What are mammals?"], "must_exclude": ["What are birds?"]}, + "locator": "" + }, + { + "url": "https://www.google.com/", + "required_contents": {"must_include": ["Google Search"], "must_exclude": ["Bing Search"]}, + "locator": "" + } + ] + } +} diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/html_content_url_comb.json b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/html_content_url_comb.json new file mode 100644 index 0000000..f419706 --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/html_content_url_comb.json @@ -0,0 +1,30 @@ +{ + "sites": ["shopping"], + "task_id": 0, + "require_login": true, + "storage_state": null, + "start_url": null, + "geolocation": null, + "intent_template": "", + "instantiation_dict": {}, + "intent": "", + "require_reset": false, + "eval": { + "eval_types": ["program_html", "url_match"], + "reference_answers": [], + "reference_url": "https://russmaxdesign.github.io/", + "url_note": "GOLD in PRED", + "program_html": [ + { + "url": "last", + "required_contents": {"must_include": ["Hello World"], "must_exclude": ["Goodbye World"]}, + "locator": "document.querySelector('[id=\"form-name\"').value" + }, + { + "url": "last", + "required_contents": {"must_include": ["alexisxy@hotmail.com"], "must_exclude": ["alexisxy@gmail.com"]}, + "locator": "document.querySelector('[id=\"form-email\"').value" + } + ] + } +} diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/html_required_values_failure_1.json b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/html_required_values_failure_1.json new file mode 100644 index 0000000..7198760 --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/html_required_values_failure_1.json @@ -0,0 +1,57 @@ +{ + "sites": [ + "shopping" + ], + "task_id": 0, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": null, + "geolocation": null, + "intent_template": "Find me {{category}} that has the same brand as products in one of the following picture. It needs to {{description}}.", + "intent": "Find me a digital camera that has the same brand as products in one of the following picture. It needs to be black and have at least 12 reviews. Furthermore, it cannot be too expensive - it needs to have price less than $700.", + "image": [ + "https://images.pexels.com/photos/1022187/pexels-photo-1022187.jpeg", + "https://images.pexels.com/photos/89551/pexels-photo-89551.jpeg" + ], + "instantiation_dict": { + "category": "a digital camera", + "description": "be black and have at least 12 reviews. Furthermore, it cannot be too expensive - it needs to have price less than $700" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": [], + "reference_url": "__SHOPPING__/olympus-om-d-e-m1-16mp-mirrorless-digital-camera-with-3-inch-lcd-body-only-silver-w-black-trim.html", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_num_reviews(__page__)", + "required_contents": { + "required_values": [ + ">= 12" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_product_price(__page__)", + "required_contents": { + "required_values": [ + "< 600" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_product_attributes(__page__, 'manufacturer |OR| brand name')", + "required_contents": { + "must_include": [ + "Olympus |OR| Nikon" + ] + } + } + ] + } +} diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/html_required_values_success_1.json b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/html_required_values_success_1.json new file mode 100644 index 0000000..d222564 --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/html_required_values_success_1.json @@ -0,0 +1,60 @@ +{ + "sites": [ + "shopping" + ], + "task_id": 0, + "require_login": false, + "storage_state": "./.auth/shopping_state.json", + "start_url": null, + "geolocation": null, + "intent_template": "Find me {{category}} that has the same brand as products in one of the following picture. It needs to {{description}}.", + "intent": "Find me a digital camera that has the same brand as products in one of the following picture. It needs to be black and have at least 12 reviews. Furthermore, it cannot be too expensive - it needs to have price less than $700.", + "image": [ + "https://images.pexels.com/photos/1022187/pexels-photo-1022187.jpeg", + "https://images.pexels.com/photos/89551/pexels-photo-89551.jpeg" + ], + "instantiation_dict": { + "category": "a digital camera", + "description": "be black and have at least 12 reviews. Furthermore, it cannot be too expensive - it needs to have price less than $700" + }, + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": [], + "reference_url": "__SHOPPING__/olympus-om-d-e-m1-16mp-mirrorless-digital-camera-with-3-inch-lcd-body-only-silver-w-black-trim.html", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_num_reviews(__page__)", + "required_contents": { + "required_values": [ + ">= 12" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_product_price(__page__)", + "required_contents": { + "required_values": [ + "< 700" + ] + } + }, + { + "url": "last", + "locator": "func:shopping_get_product_attributes(__page__, 'manufacturer |OR| brand name')", + "required_contents": { + "must_include": [ + "Olympus |OR| Nikon" + ], + "must_exclude": [ + "Canon" + ] + } + } + ] + } +} diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/image_evaluator_yes.json b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/image_evaluator_yes.json new file mode 100644 index 0000000..157df2a --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/image_evaluator_yes.json @@ -0,0 +1,45 @@ +{ + "sites": [ + "shopping" + ], + "task_id": 0, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Bring me to the produce page of something like this.", + "intent": "Bring me to the produce page of something like this.", + "image": "https://images.pexels.com/photos/837267/pexels-photo-837267.jpeg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "page_image_query": [ + { + "eval_image_url": "__SHOPPING__/artificial-cactus-fake-cactus-16-faux-cacti-plants-for-home-garden-office-store-decoration.html", + "eval_image_class": ".fotorama__stage__frame", + "eval_vqa": [ + { + "question": "Is this a cactus? (yes/no)", + "answer": "yes" + }, + { + "question": "Is this a potted plant? (yes/no)", + "answer": "yes" + } + ] + } + ], + "reference_url": "__SHOPPING__/artificial-cactus-fake-cactus-16-faux-cacti-plants-for-home-garden-office-store-decoration.html", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "" + }, + "intent_template_id": 0, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "__SHOPPING__/artificial-cactus-fake-cactus-16-faux-cacti-plants-for-home-garden-office-store-decoration.html", + "overall_difficulty": "medium" +} diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/image_evaluator_yes_direct_img.json b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/image_evaluator_yes_direct_img.json new file mode 100644 index 0000000..181278b --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/image_evaluator_yes_direct_img.json @@ -0,0 +1,45 @@ +{ + "sites": [ + "shopping" + ], + "task_id": 0, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Bring me to the produce page of something like this.", + "intent": "Bring me to the produce page of something like this.", + "image": "https://images.pexels.com/photos/837267/pexels-photo-837267.jpeg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "page_image_query": [ + { + "eval_image_url": "__SHOPPING__/artificial-cactus-fake-cactus-16-faux-cacti-plants-for-home-garden-office-store-decoration.html", + "eval_image_class": ".fotorama__img", + "eval_vqa": [ + { + "question": "Is this a cactus? (yes/no)", + "answer": "yes" + }, + { + "question": "Is this a potted plant? (yes/no)", + "answer": "yes" + } + ] + } + ], + "reference_url": "__SHOPPING__/artificial-cactus-fake-cactus-16-faux-cacti-plants-for-home-garden-office-store-decoration.html", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "" + }, + "intent_template_id": 0, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "__SHOPPING__/artificial-cactus-fake-cactus-16-faux-cacti-plants-for-home-garden-office-store-decoration.html", + "overall_difficulty": "medium" +} diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/image_evaluator_yes_no.json b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/image_evaluator_yes_no.json new file mode 100644 index 0000000..136782f --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/image_evaluator_yes_no.json @@ -0,0 +1,45 @@ +{ + "sites": [ + "shopping" + ], + "task_id": 0, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Bring me to the produce page of something like this.", + "intent": "Bring me to the produce page of something like this.", + "image": "https://images.pexels.com/photos/837267/pexels-photo-837267.jpeg", + "instantiation_dict": {}, + "require_reset": false, + "eval": { + "eval_types": [ + "page_image_query" + ], + "page_image_query": [ + { + "eval_image_url": "__SHOPPING__/artificial-cactus-fake-cactus-16-faux-cacti-plants-for-home-garden-office-store-decoration.html", + "eval_image_class": ".fotorama__img", + "eval_vqa": [ + { + "question": "Is this a cactus? (yes/no)", + "answer": "yes" + }, + { + "question": "Is this a potted plant? (yes/no)", + "answer": "no" + } + ] + } + ], + "reference_url": "__SHOPPING__/artificial-cactus-fake-cactus-16-faux-cacti-plants-for-home-garden-office-store-decoration.html", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "" + }, + "intent_template_id": 0, + "reasoning_difficulty": "easy", + "visual_difficulty": "medium", + "comments": "__SHOPPING__/artificial-cactus-fake-cactus-16-faux-cacti-plants-for-home-garden-office-store-decoration.html", + "overall_difficulty": "medium" +} diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/string_match.json b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/string_match.json new file mode 100644 index 0000000..dc38fb0 --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/string_match.json @@ -0,0 +1,21 @@ +{ + "sites": ["reddit"], + "task_id": 0, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": null, + "geolocation": null, + "intent_template": "", + "instantiation_dict": {}, + "intent": "", + "require_reset": false, + "eval": { + "eval_types": ["string_match"], + "reference_answers": { + "must_include": ["1985/04/18"], + "must_exclude": ["1985/04/17"] + }, + "reference_url": "", + "program_html": null + } +} diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/url_exact_match.json b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/url_exact_match.json new file mode 100644 index 0000000..1c29f4d --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/configs/url_exact_match.json @@ -0,0 +1,23 @@ +{ + "sites": ["reddit"], + "task_id": 0, + "require_login": true, + "storage_state": null, + "start_url": null, + "geolocation": null, + "intent_template": "", + "instantiation_dict": {}, + "intent": "", + "require_reset": false, + "eval": { + "eval_types": ["url_match"], + "reference_answers": [], + "reference_url": "https://www.google.com/", + "program_html": [ + { + "url": "", + "required_contents": [] + } + ] + } +} diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/test_exact_evaluators.py b/VAB-WebArena-Lite/tests/test_evaluation_harness/test_exact_evaluators.py new file mode 100644 index 0000000..0fbf735 --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/test_exact_evaluators.py @@ -0,0 +1,596 @@ +import json +import os +import random +from glob import glob +from pathlib import Path +from typing import Any + +import pytest +import requests +from PIL import Image +from py import test + +from agent import Agent, TeacherForcingAgent +from browser_env import ActionTypes, ScriptBrowserEnv +from browser_env.env_config import * +from evaluation_harness import ( + HTMLContentExactEvaluator, + PageImageEvaluator, + StringEvaluator, + URLExactEvaluator, + image_utils, +) +from evaluation_harness.evaluators import EvaluatorComb + +IN_GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true" +HEADLESS = True +config_file_folder = "tests/test_evaluation_harness/configs" + + +def tf_roll_out( + agent: Agent, env: ScriptBrowserEnv, config_file: str +) -> list[Any]: + """Roll out the agent using teacher forcing actions""" + obs, state_info = env.reset(options={"config_file": config_file}) + + trajectory: list[Any] = [{"observation": obs, "info": state_info}] + while True: + action = agent.next_action( + trajectory=trajectory, intent="", meta_data={} + ) + trajectory.append(action) + if action["action_type"] == ActionTypes.STOP: + break + + # preceed to next action + obs, reward, terminated, truncated, info = env.step(action) + state_info = {"observation": obs, "info": info} + trajectory.append(state_info) + + return trajectory + + +def test_string_match_success( + script_browser_env: ScriptBrowserEnv, +) -> None: + config_file = f"{config_file_folder}/string_match.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = """page.stop("The date is 1985/04/18")""" + agent.set_actions(action_seq) + + env = script_browser_env + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = StringEvaluator() + score = evalutor( + trajectory, config_file, env.page + ) + + assert score == 1.0 + + +def test_string_match_fail(script_browser_env: ScriptBrowserEnv) -> None: + config_file = f"{config_file_folder}/string_match.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = """page.stop("The date is 1936/04/18")""" + agent.set_actions(action_seq) + + env = script_browser_env + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = StringEvaluator() + score = evalutor( + trajectory, config_file, env.page + ) + + assert score == 0.0 + + +def test_url_exact_match_success(script_browser_env: ScriptBrowserEnv) -> None: + config_file = f"{config_file_folder}/url_exact_match.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.goto("https://www.google.com/") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = URLExactEvaluator() + score = evalutor( + trajectory, config_file, env.page + ) + assert score == 1.0 + + +def test_url_exact_match_fail(script_browser_env: ScriptBrowserEnv) -> None: + config_file = f"{config_file_folder}/url_exact_match.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.goto("https://github.com/web-arena-x") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = URLExactEvaluator() + score = evalutor( + trajectory, config_file, env.page + ) + print(env.page.url) + assert score == 0.0 + + +def test_html_content_match_success( + script_browser_env: ScriptBrowserEnv, +) -> None: + config_file = f"{config_file_folder}/html_content_exact_match.json" + + # randomly sample a string + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.goto("https://russmaxdesign.github.io/exercise") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = HTMLContentExactEvaluator() + score = evalutor( + trajectory, config_file, env.page + ) + assert score == 1.0 + + +def test_html_content_match_fail(script_browser_env: ScriptBrowserEnv) -> None: + config_file = f"{config_file_folder}/html_content_exact_match.json" + + # randomly sample a string + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = """page.goto("https://www.google.com/") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = HTMLContentExactEvaluator() + score = evalutor( + trajectory, config_file, env.page + ) + assert score == 0.0 + + +def test_html_content_element_match_success( + script_browser_env: ScriptBrowserEnv, +) -> None: + config_file = f"{config_file_folder}/html_content_element_exact_match.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.goto("https://russmaxdesign.github.io/exercise/") + page.get_by_label("Full name").fill("Hello World") + page.get_by_label("Email").click() + page.get_by_label("Email").fill("alexisxy@hotmail.com") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = HTMLContentExactEvaluator() + score = evalutor( + trajectory, config_file, env.page + ) + assert score == 1.0 + + +def test_html_content_element_match_fail( + script_browser_env: ScriptBrowserEnv, +) -> None: + config_file = f"{config_file_folder}/html_content_element_exact_match.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.goto("https://russmaxdesign.github.io/exercise/") + page.get_by_label("Full name").fill("Hello") + page.get_by_label("Email").click() + page.get_by_label("Email").fill("alexisxy@hotmail.com") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = HTMLContentExactEvaluator() + score = evalutor( + trajectory, config_file, env.page + ) + assert score == 0.0 + + +def test_html_content_url_comb_success( + script_browser_env: ScriptBrowserEnv, +) -> None: + config_file = f"{config_file_folder}/html_content_url_comb.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.goto("https://russmaxdesign.github.io/exercise/") + page.get_by_label("Full name").fill("Hello World") + page.get_by_label("Email").click() + page.get_by_label("Email").fill("alexisxy@hotmail.com") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + + trajectory = tf_roll_out(agent, env, config_file) + + evaluators = EvaluatorComb( + [URLExactEvaluator(), HTMLContentExactEvaluator()] + ) + score = evaluators( + trajectory, config_file, env.page + ) + assert score == 1.0 + + +@pytest.mark.skipif( + IN_GITHUB_ACTIONS, reason="Won't work using the demo sites" +) +def test_func_success( + script_browser_env: ScriptBrowserEnv, +) -> None: + config_file = f"{config_file_folder}/func_eval_success.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = HTMLContentExactEvaluator() + score = evalutor( + trajectory, config_file, env.page + ) + assert score == 1.0 + + +@pytest.mark.skipif( + IN_GITHUB_ACTIONS, reason="Won't work using the demo sites" +) +def test_func_fail( + script_browser_env: ScriptBrowserEnv, +) -> None: + config_file = f"{config_file_folder}/func_eval_fail.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = HTMLContentExactEvaluator() + score = evalutor( + trajectory, config_file, env.page + ) + assert score == 0.0 + + +def test_func_url_func_last_success( + script_browser_env: ScriptBrowserEnv, +) -> None: + config_file = f"{config_file_folder}/func_url_func_1.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.goto("{REDDIT}/f/wallstreetbets/50431/-/comment/676875") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = HTMLContentExactEvaluator() + score = evalutor( + trajectory, config_file, env.page + ) + assert score == 1.0 + + +def test_html_required_values_success( + script_browser_env: ScriptBrowserEnv, +) -> None: + for config_file in glob( + f"{config_file_folder}/html_required_values_success_*.json" + ): + # change the URL placeholder with the concrete URL + with open(config_file, "r") as f: + configs = json.load(f) + configs["eval"]["reference_url"] = configs["eval"][ + "reference_url" + ].replace("__SHOPPING__", SHOPPING) + tmp_config = config_file.replace(".json", ".json.tmp") + with open(tmp_config, "w+") as f: + json.dump(configs, f, indent=4) + + # randomly sample a string + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + gt_url = configs["eval"]["reference_url"] + action_seq = f"""page.goto("{gt_url}") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + + trajectory = tf_roll_out(agent, env, tmp_config) + + evalutor = HTMLContentExactEvaluator() + score = evalutor( + trajectory, tmp_config, env.page + ) + os.remove(tmp_config) + assert score == 1.0 + + +def test_page_image_evaluator( + script_browser_env: ScriptBrowserEnv, +) -> None: + for config_file in [ + f"{config_file_folder}/image_evaluator_yes.json", + f"{config_file_folder}/image_evaluator_yes_direct_img.json", + ]: + # change the URL placeholder with the concrete URL + with open(config_file, "r") as f: + configs = json.load(f) + configs["eval"]["reference_url"] = configs["eval"][ + "reference_url" + ].replace("__SHOPPING__", SHOPPING) + configs["start_url"] = configs["start_url"].replace( + "__SHOPPING__", SHOPPING + ) + for e in configs["eval"]["page_image_query"]: + e["eval_image_url"] = e["eval_image_url"].replace("__SHOPPING__", SHOPPING) + tmp_config = config_file.replace(".json", ".json.tmp") + with open(tmp_config, "w+") as f: + json.dump(configs, f, indent=4) + + # randomly sample a string + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + gt_url = configs["eval"]["page_image_query"][0]["eval_image_url"] + action_seq = f"""page.goto("{gt_url}") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + + trajectory = tf_roll_out(agent, env, tmp_config) + + # Create a dummy captioning function that always returns "yes" + captioning_fn = lambda images, *args, **kwargs: ["yes"] * len(images) + evalutor = PageImageEvaluator(captioning_fn) + score = evalutor( + trajectory, tmp_config, env.page + ) + + # Create a dummy captioning function that always returns "yes" + captioning_fn = lambda images, *args, **kwargs: ["no"] * len(images) + evalutor = PageImageEvaluator(captioning_fn) + score_no = evalutor( + trajectory, tmp_config, env.page + ) + os.remove(tmp_config) + assert score == 1.0 + assert score_no == 0.0 + + +def test_page_image_evaluator_yes_no( + script_browser_env: ScriptBrowserEnv, +) -> None: + config_file = f"{config_file_folder}/image_evaluator_yes_no.json" + # change the URL placeholder with the concrete URL + with open(config_file, "r") as f: + configs = json.load(f) + configs["eval"]["reference_url"] = configs["eval"][ + "reference_url" + ].replace("__SHOPPING__", SHOPPING) + configs["start_url"] = configs["start_url"].replace( + "__SHOPPING__", SHOPPING + ) + for e in configs["eval"]["page_image_query"]: + e["eval_image_url"] = e["eval_image_url"].replace("__SHOPPING__", SHOPPING) + tmp_config = config_file.replace(".json", ".json.tmp") + with open(tmp_config, "w+") as f: + json.dump(configs, f, indent=4) + + # randomly sample a string + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + gt_url = configs["eval"]["page_image_query"][0]["eval_image_url"] + action_seq = f"""page.goto("{gt_url}") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + + trajectory = tf_roll_out(agent, env, tmp_config) + + # Create a dummy captioning function that always returns "yes" + captioning_fn = lambda images, *args, **kwargs: ["yes"] * len(images) + evalutor = PageImageEvaluator(captioning_fn) + score = evalutor( + trajectory, tmp_config, env.page + ) + assert score == 0.0 + + +def test_html_required_values_failure( + script_browser_env: ScriptBrowserEnv, +) -> None: + for config_file in glob( + f"{config_file_folder}/html_required_values_failure_*.json" + ): + # change the URL placeholder with the concrete URL + with open(config_file, "r") as f: + configs = json.load(f) + configs["eval"]["reference_url"] = configs["eval"][ + "reference_url" + ].replace("__SHOPPING__", SHOPPING) + tmp_config = config_file.replace(".json", ".json.tmp") + with open(tmp_config, "w+") as f: + json.dump(configs, f, indent=4) + + # randomly sample a string + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + gt_url = configs["eval"]["reference_url"] + action_seq = f"""page.goto("{gt_url}") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + + trajectory = tf_roll_out(agent, env, tmp_config) + + evalutor = HTMLContentExactEvaluator() + score = evalutor( + trajectory, tmp_config, env.page + ) + os.remove(tmp_config) + assert score == 0.0 + + +def test_exact_image( + script_browser_env: ScriptBrowserEnv, +) -> None: + for config_file, expected_score in zip( + [ + f"{config_file_folder}/exact_image_success.json", + f"{config_file_folder}/exact_image_failure.json", + f"{config_file_folder}/exact_image_resize_failure_1.json", + f"{config_file_folder}/exact_image_resize_failure_2.json", + ], + [1.0, 0.0, 0.0, 0.0], + ): + # change the URL placeholder with the concrete URL + with open(config_file, "r") as f: + configs = json.load(f) + configs["start_url"] = configs["start_url"].replace( + "__REDDIT__", REDDIT + ) + configs["eval"]["reference_url"] = configs["eval"][ + "reference_url" + ].replace("__REDDIT__", REDDIT) + + for e in configs["eval"]["page_image_query"]: + e["eval_fuzzy_image_match"] = e["eval_fuzzy_image_match"].replace("__REDDIT__", REDDIT) + configs["image"] = configs["image"].replace("__REDDIT__", REDDIT) + configs["intent"] = configs["intent"].replace("__REDDIT__", REDDIT) + tmp_config = config_file.replace(".json", ".tmp.json") + with open(tmp_config, "w+") as f: + json.dump(configs, f, indent=4) + + gt_url = configs["eval"]["reference_url"] + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.goto("{gt_url}") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + trajectory = tf_roll_out(agent, env, tmp_config) + + captioning_fn = lambda images, *args, **kwargs: ["yes"] * len( + images + ) # Unused for this task + evalutor = PageImageEvaluator(captioning_fn) + score = evalutor( + trajectory, tmp_config, env.page + ) + assert score == expected_score, config_file + os.remove(tmp_config) + + +def test_exact_image_resize_success( + script_browser_env: ScriptBrowserEnv, +) -> None: + config_file = f"{config_file_folder}/exact_image_resize_template.json" + resized_img_path = "resized_img.tmp.png" + for downscale, expected_score in zip( + [2, 8], [1.0, 0.0] + ): # 2x (should pass) and 8x (should fail) downscale + # change the URL placeholder with the concrete URL + with open(config_file, "r") as f: + configs = json.load(f) + configs["start_url"] = configs["start_url"].replace( + "__REDDIT__", REDDIT + ) + configs["eval"]["reference_url"] = configs["eval"][ + "reference_url" + ].replace("__REDDIT__", REDDIT) + for e in configs["eval"]["page_image_query"]: + e["eval_fuzzy_image_match"] = e["eval_fuzzy_image_match"].replace("__REDDIT__", REDDIT) + configs["image"] = configs["image"].replace("__REDDIT__", REDDIT) + configs["intent"] = configs["intent"].replace("__REDDIT__", REDDIT) + + # Download an image and resize + img_url = configs["instantiation_dict"]["image_url"].replace( + "__REDDIT__", REDDIT + ) + img = Image.open(requests.get(img_url, stream=True).raw) + # Resize image to half its size + resized_img = img.resize( + (img.width // downscale, img.height // downscale) + ) + with open(resized_img_path, "wb") as wf: + resized_img.save(wf, format="png") + + configs["eval"]["page_image_query"][0]["eval_fuzzy_image_match"] = resized_img_path + + tmp_config = config_file.replace(".json", ".tmp.json") + with open(tmp_config, "w+") as f: + json.dump(configs, f, indent=4) + + gt_url = configs["eval"]["reference_url"] + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.goto("{gt_url}") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + trajectory = tf_roll_out(agent, env, tmp_config) + + captioning_fn = lambda images, *args, **kwargs: ["yes"] * len( + images + ) # Unused for this task + evalutor = PageImageEvaluator(captioning_fn) + score = evalutor( + trajectory, tmp_config, env.page + ) + assert score == expected_score, config_file + os.remove(tmp_config) + os.remove(resized_img_path) \ No newline at end of file diff --git a/VAB-WebArena-Lite/tests/test_evaluation_harness/test_helper_functions.py b/VAB-WebArena-Lite/tests/test_evaluation_harness/test_helper_functions.py new file mode 100644 index 0000000..a176695 --- /dev/null +++ b/VAB-WebArena-Lite/tests/test_evaluation_harness/test_helper_functions.py @@ -0,0 +1,276 @@ +import json +import os + +from browser_env import ScriptBrowserEnv +from browser_env.env_config import * +from evaluation_harness.helper_functions import ( + get_query_text, + get_query_text_lowercase, + reddit_get_latest_comment_content_by_username, + reddit_get_parent_comment_username_of_latest_comment_by_username, + shopping_get_num_reviews, + shopping_get_order_product_option, + shopping_get_order_product_quantity, + shopping_get_product_attributes, + shopping_get_product_price, + shopping_get_rating_as_percentage, + shopping_get_sku_latest_review_rating, + shopping_get_sku_latest_review_text, + shopping_get_sku_latest_review_title, + shopping_get_sku_product_page_url, +) + +HEADLESS = True +config_file_folder = "tests/test_evaluation_harness/configs" + + +def test_shopping_get_attributes( + script_browser_env: ScriptBrowserEnv, +) -> None: + env = script_browser_env + config_file = f"{config_file_folder}/config_shopping_attr.json.tmp" + + with open(config_file, "w") as f: + json.dump({"storage_state": ".auth/shopping_state.json"}, f) + env.reset(options={"config_file": config_file}) + env.page.goto(f"{SHOPPING}/nec-np4100-6200-lumen-xga-dlp-projector.html") + manufacturer = shopping_get_product_attributes( + env.page, "manufacturer |OR| brand name" + ) + + env.reset(options={"config_file": config_file}) + env.page.goto( + f"{SHOPPING}/lg-50nano80upa-50-nanocell-4k-nano80-series-smart-ultra-hd-tv-with-an-lg-sn6y-3-1-channel-dts-virtual-high-resolution-soundbar-and-subwoofer-2021.html" + ) + brand_name = shopping_get_product_attributes( + env.page, "manufacturer |OR| brand name" + ) + + # remove tmp config file + os.remove(config_file) + assert "NEC Displays" in manufacturer + assert "LG" in brand_name + + +def test_get_query_text( + script_browser_env: ScriptBrowserEnv, +) -> None: + env = script_browser_env + config_file = f"{config_file_folder}/config_shopping_query.json.tmp" + + with open(config_file, "w") as f: + json.dump({"storage_state": ".auth/shopping_state.json"}, f) + env.reset(options={"config_file": config_file}) + env.page.goto( + f"{SHOPPING}/la-guapa-virtual-projection-keyboard-laser-projection-bluetooth-wireless-keyboard-for-smart-phone-pc-tablet-laptop-wireless-laser-projection-keyboard-silver.html" + ) + query_text = get_query_text( + env.page, "#maincontent > div.page-title-wrapper.product > h1 > span" + ) + assert "Projection Keyboard" in query_text + + query_text_lower = get_query_text_lowercase( + env.page, "#maincontent > div.page-title-wrapper.product > h1 > span" + ) + assert "projection keyboard" in query_text_lower + + # remove tmp config file + os.remove(config_file) + + +def test_get_product_price( + script_browser_env: ScriptBrowserEnv, +) -> None: + env = script_browser_env + config_file = f"{config_file_folder}/config_shopping_price.json.tmp" + + with open(config_file, "w") as f: + json.dump({"storage_state": ".auth/shopping_state.json"}, f) + env.reset(options={"config_file": config_file}) + env.page.goto( + f"{SHOPPING}/la-guapa-virtual-projection-keyboard-laser-projection-bluetooth-wireless-keyboard-for-smart-phone-pc-tablet-laptop-wireless-laser-projection-keyboard-silver.html" + ) + product_price = shopping_get_product_price(env.page) + assert product_price == 26.99 + + env.page.goto(f"{SHOPPING}") + product_price = shopping_get_product_price(env.page) + assert product_price == 0 + + # remove tmp config file + os.remove(config_file) + + +def test_get_num_reviews( + script_browser_env: ScriptBrowserEnv, +) -> None: + env = script_browser_env + config_file = f"{config_file_folder}/config_shopping_num_reviews.json.tmp" + + with open(config_file, "w") as f: + json.dump({"storage_state": ".auth/shopping_state.json"}, f) + env.reset(options={"config_file": config_file}) + env.page.goto( + f"{SHOPPING}/la-guapa-virtual-projection-keyboard-laser-projection-bluetooth-wireless-keyboard-for-smart-phone-pc-tablet-laptop-wireless-laser-projection-keyboard-silver.html" + ) + product_reviews = shopping_get_num_reviews(env.page) + assert product_reviews == 12 + # remove tmp config file + os.remove(config_file) + + +def test_get_product_rating( + script_browser_env: ScriptBrowserEnv, +) -> None: + env = script_browser_env + config_file = ( + f"{config_file_folder}/config_shopping_rating_percentage.json.tmp" + ) + + with open(config_file, "w") as f: + json.dump({"storage_state": ".auth/shopping_state.json"}, f) + env.reset(options={"config_file": config_file}) + env.page.goto( + f"{SHOPPING}/v8-energy-healthy-energy-drink-steady-energy-from-black-and-green-tea-pomegranate-blueberry-8-ounce-can-pack-of-24.html" + ) + product_rating = shopping_get_rating_as_percentage(env.page) + assert product_rating == 57 + + env.page.goto(f"{SHOPPING}/catalogsearch/advanced/") + product_rating = shopping_get_rating_as_percentage(env.page) + assert product_rating == 0 + # remove tmp config file + os.remove(config_file) + + +def test_shopping_get_sku_product_page_url( + script_browser_env: ScriptBrowserEnv, +) -> None: + true_url = f"{SHOPPING}/xbox-wireless-controller-phantom-white-special-edition.html" + url = shopping_get_sku_product_page_url("B07P3L5GMW") + + assert url == true_url + + +# NOTE: These fail if the B07N4Q7P67 reviews are modified and are hence just useful as a sanity check. +def test_shopping_get_sku_latest_review_text( + script_browser_env: ScriptBrowserEnv, +) -> None: + true_text = "Good quality" + text = shopping_get_sku_latest_review_text("B07N4Q7P67") + + assert text == true_text, f"Expected: {true_text}\nGot: {text}" + + +def test_shopping_get_sku_latest_review_title( + script_browser_env: ScriptBrowserEnv, +) -> None: + true_title = "Fits Nintendo switch" + title = shopping_get_sku_latest_review_title("B07N4Q7P67") + + assert title == true_title, f"Expected: {true_title}\nGot: {title}" + + +def test_shopping_get_sku_latest_review_text( + script_browser_env: ScriptBrowserEnv, +) -> None: + true_text = "Good quality" + text = shopping_get_sku_latest_review_text("B07N4Q7P67") + + assert text == true_text, f"Expected: {true_text}\nGot: {text}" + + +def test_shopping_get_sku_latest_review_rating( + script_browser_env: ScriptBrowserEnv, +) -> None: + true_rating = "100" + rating = shopping_get_sku_latest_review_rating("B07N4Q7P67") + + assert rating == true_rating, f"Expected: {true_rating}\nGot: {rating}" + + +def test_shopping_get_order_product_quantity( + script_browser_env: ScriptBrowserEnv, +) -> None: + env = script_browser_env + config_file = f"{config_file_folder}/config_test_shopping_get_order_product_quantity.json.tmp" + + with open(config_file, "w") as f: + json.dump({"storage_state": ".auth/shopping_state.json"}, f) + env.reset(options={"config_file": config_file}) + env.page.goto(f"{SHOPPING}/sales/order/view/order_id/170/") + + quantity = shopping_get_order_product_quantity(env.page, "B087QSCXGT") + assert quantity == 1 + + quantity = shopping_get_order_product_quantity(env.page, "B08JLHHCM6") + assert quantity == 1 + + quantity = shopping_get_order_product_quantity(env.page, "B09LQTV3RX") + assert quantity == 1 + + # remove tmp config file + os.remove(config_file) + + +def test_shopping_get_order_product_option( + script_browser_env: ScriptBrowserEnv, +) -> None: + env = script_browser_env + config_file = f"{config_file_folder}/config_test_shopping_get_order_product_option.json.tmp" + + with open(config_file, "w") as f: + json.dump({"storage_state": ".auth/shopping_state.json"}, f) + env.reset(options={"config_file": config_file}) + env.page.goto(f"{SHOPPING}/sales/order/view/order_id/170/") + + option = shopping_get_order_product_option(env.page, "B09LQTV3RX", "Color") + assert option == "Blue" + + option = shopping_get_order_product_option(env.page, "B09LQTV3RX", "Size") + assert option == "Large" + + # remove tmp config file + os.remove(config_file) + + +def test_reddit_get_latest_comment_content_by_username( + script_browser_env: ScriptBrowserEnv, +) -> None: + env = script_browser_env + config_file = f"{config_file_folder}/config_test_reddit_get_post_comment_tree.json.tmp" + + with open(config_file, "w") as f: + json.dump({"storage_state": ".auth/reddit_state.json"}, f) + env.reset(options={"config_file": config_file}) + env.page.goto(f"{REDDIT}/f/AskReddit/116809") + + comment_content = reddit_get_latest_comment_content_by_username( + env.page, "DavosLostFingers" + ) + assert comment_content == "Constantly on their phone" + + # remove tmp config file + os.remove(config_file) + + +def test_reddit_get_parent_comment_username_of_latest_comment_by_username( + script_browser_env: ScriptBrowserEnv, +) -> None: + env = script_browser_env + config_file = f"{config_file_folder}/config_test_reddit_get_parent_comment_tree.json.tmp" + + with open(config_file, "w") as f: + json.dump({"storage_state": ".auth/reddit_state.json"}, f) + env.reset(options={"config_file": config_file}) + env.page.goto(f"{REDDIT}/f/memes/127590") + + comment_content = ( + reddit_get_parent_comment_username_of_latest_comment_by_username( + env.page, "Veryhawtwoman" + ) + ) + assert comment_content == "Da_Bro_Main" + + # remove tmp config file + os.remove(config_file) diff --git a/VAB-WebArena-Lite/new/wa_parallel_run.sh b/VAB-WebArena-Lite/wa_parallel_run.sh similarity index 100% rename from VAB-WebArena-Lite/new/wa_parallel_run.sh rename to VAB-WebArena-Lite/wa_parallel_run.sh diff --git a/VAB-WebArena-Lite/new/wa_parallel_run_webrl.sh b/VAB-WebArena-Lite/wa_parallel_run_webrl.sh similarity index 100% rename from VAB-WebArena-Lite/new/wa_parallel_run_webrl.sh rename to VAB-WebArena-Lite/wa_parallel_run_webrl.sh diff --git a/VAB-WebArena-Lite/new/wa_parallel_run_webrl_chat.sh b/VAB-WebArena-Lite/wa_parallel_run_webrl_chat.sh similarity index 66% rename from VAB-WebArena-Lite/new/wa_parallel_run_webrl_chat.sh rename to VAB-WebArena-Lite/wa_parallel_run_webrl_chat.sh index f018513..37d81c0 100644 --- a/VAB-WebArena-Lite/new/wa_parallel_run_webrl_chat.sh +++ b/VAB-WebArena-Lite/wa_parallel_run_webrl_chat.sh @@ -1,26 +1,33 @@ #!/bin/bash DATASET='webarena' # TODO: select from ['webarena', 'visualwebarena'] -result_dir='' # TODO: set your result_dir +result_dir='./results/webrl_chat' # TODO: set your result_dir provider='openai' # TODO: select from ['openai', 'finetune', ...] -model='' # TODO: assign model name, which is used for action generation -planner_ip='' # TODO: ip address of the model you are deploying (only if you are deploying your own model using e.g. vllm) +model='webrl-llama-3.1-8b' # TODO: assign model name, which is used for action generation +planner_ip='192.168.16.116' # TODO: ip address of the model you are deploying (only if you are deploying your own model using e.g. vllm) instruction_path='agent/prompts/jsons/p_webrl_chat.json' # e.g., agent/prompts/jsons/p_cot_id_actree_2s.json test_config_base_dir='config_files/wa/test_webarena_lite' # e.g., config_files/wa/test_webarena_lite temperature=0.0 +proxy_url='socks5://98.152.200.61:8081' -SERVER='' # TODO: your server address -MAP_SERVER='' # TODO: the server address for MAP tasks -OPENAI_API_KEY='' # TODO: if you test OpenAI APIs +SERVER='localhost' # TODO: your server address +MAP_SERVER='https://www.openstreetmap.org' # TODO: the server address for MAP tasks +OPENAI_API_KEY='none' # TODO: if you test OpenAI APIs +OPENAI_API_URL='http://192.168.16.116:18080/v1' # TODO: if you test OpenAI APIs +OPENAI_API_KEY_FUZZY="sk-uG8JLGz3wlXTAmiC8337A02f5d2946F2Ba3dE64427B90c2f" +OPENAI_API_URL_FUZZY="https://aiproxy.lmzgc.cn:8080/v1/" OPENAI_ORGANIZATION='' -CONDA_ENV_NAME='' # TODO: the name of your conda environment for testing WebArena +CONDA_ENV_NAME='vab' # TODO: the name of your conda environment for testing WebArena -ENV_VARIABLES="export DATASET=${DATASET}; export SHOPPING='http://${SERVER}:7770';export SHOPPING_ADMIN='http://${SERVER}:7780/admin';export REDDIT='http://${SERVER}:9999';export GITLAB='http://${SERVER}:8023';export MAP='http://${MAP_SERVER}:3000';export WIKIPEDIA='http://${SERVER}:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing';export HOMEPAGE='http://${SERVER}:4399';export OPENAI_API_KEY=${OPENAI_API_KEY};export OPENAI_ORGANIZATION=${OPENAI_ORGANIZATION}" + +ENV_VARIABLES="export DATASET=${DATASET}; export SHOPPING='http://${SERVER}:28082';export SHOPPING_ADMIN='http://${SERVER}:28083/admin';export REDDIT='http://${SERVER}:28080';export GITLAB='http://${SERVER}:28084';export MAP='${MAP_SERVER}';export WIKIPEDIA='http://${SERVER}:28081/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing';export HOMEPAGE='http://${SERVER}:20080';export OPENAI_API_KEY=${OPENAI_API_KEY};export OPENAI_API_URL=${OPENAI_API_URL};export OPENAI_ORGANIZATION=${OPENAI_ORGANIZATION};export OPENAI_API_KEY_FUZZY=${OPENAI_API_KEY_FUZZY};export OPENAI_API_URL_FUZZY=${OPENAI_API_URL_FUZZY}" +echo $ENV_VARIABLES # get the number of tmux panes num_panes=$(tmux list-panes | wc -l) # calculate how many panes need to be created let "panes_to_create = 7 - num_panes" +# let "panes_to_create = 1 - num_panes" # array of tmux commands to create each pane tmux_commands=( @@ -60,6 +67,7 @@ run_job() { --viewport_height 720 \ --parsing_failure_th 5 \ --repeating_action_failure_th 5 \ + --proxy_url ${proxy_url} \ --action_set_tag webrl_id --observation_type webrl" tmux send-keys "tmux set mouse on; conda activate ${CONDA_ENV_NAME}; ${ENV_VARIABLES}; until ${COMMAND}; do echo 'crashed' >&2; sleep 1; done" C-m sleep 3 diff --git a/VAB-WebArena-Lite/webarena.egg-info/PKG-INFO b/VAB-WebArena-Lite/webarena.egg-info/PKG-INFO new file mode 100644 index 0000000..8788227 --- /dev/null +++ b/VAB-WebArena-Lite/webarena.egg-info/PKG-INFO @@ -0,0 +1,12 @@ +Metadata-Version: 2.2 +Name: webarena +Version: 0.0.0 +Requires-Python: <4,>=3.7 +License-File: LICENSE +Provides-Extra: dev +Requires-Dist: pre-commit==3.0.1; extra == "dev" +Requires-Dist: pytest==7.1.2; extra == "dev" +Requires-Dist: mypy==0.991; extra == "dev" +Requires-Dist: nbmake; extra == "dev" +Requires-Dist: pytest-asyncio; extra == "dev" +Requires-Dist: types-requests; extra == "dev" diff --git a/VAB-WebArena-Lite/webarena.egg-info/SOURCES.txt b/VAB-WebArena-Lite/webarena.egg-info/SOURCES.txt new file mode 100644 index 0000000..a95a425 --- /dev/null +++ b/VAB-WebArena-Lite/webarena.egg-info/SOURCES.txt @@ -0,0 +1,31 @@ +LICENSE +README.md +setup.cfg +setup.py +agent/__init__.py +agent/agent.py +browser_env/__init__.py +browser_env/actions.py +browser_env/async_envs.py +browser_env/auto_login.py +browser_env/constants.py +browser_env/env_config.py +browser_env/envs.py +browser_env/helper_functions.py +browser_env/processors.py +browser_env/py.typed +browser_env/trajectory.py +browser_env/utils.py +evaluation_harness/__init__.py +evaluation_harness/evaluators.py +evaluation_harness/helper_functions.py +evaluation_harness/image_utils.py +llms/__init__.py +llms/lm_config.py +llms/tokenizers.py +llms/utils.py +webarena.egg-info/PKG-INFO +webarena.egg-info/SOURCES.txt +webarena.egg-info/dependency_links.txt +webarena.egg-info/requires.txt +webarena.egg-info/top_level.txt \ No newline at end of file diff --git a/VAB-WebArena-Lite/webarena.egg-info/dependency_links.txt b/VAB-WebArena-Lite/webarena.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/VAB-WebArena-Lite/webarena.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/VAB-WebArena-Lite/webarena.egg-info/requires.txt b/VAB-WebArena-Lite/webarena.egg-info/requires.txt new file mode 100644 index 0000000..5fe0668 --- /dev/null +++ b/VAB-WebArena-Lite/webarena.egg-info/requires.txt @@ -0,0 +1,8 @@ + +[dev] +pre-commit==3.0.1 +pytest==7.1.2 +mypy==0.991 +nbmake +pytest-asyncio +types-requests diff --git a/VAB-WebArena-Lite/webarena.egg-info/top_level.txt b/VAB-WebArena-Lite/webarena.egg-info/top_level.txt new file mode 100644 index 0000000..1a621bd --- /dev/null +++ b/VAB-WebArena-Lite/webarena.egg-info/top_level.txt @@ -0,0 +1,4 @@ +agent +browser_env +evaluation_harness +llms diff --git a/log_files/log_20250423143103_11.log b/log_files/log_20250423143103_11.log new file mode 100644 index 0000000..9930a07 --- /dev/null +++ b/log_files/log_20250423143103_11.log @@ -0,0 +1,2 @@ +2025-04-23 14:31:03,109 - INFO - Create result dir: ./results/webrl_chat +2025-04-23 14:31:03,111 - INFO - Dump config to results/webrl_chat/config.json