webrl/VAB-WebArena-Lite/config_files/vwa/test_reddit.raw.json
2025-04-23 17:01:18 +08:00

8301 lines
331 KiB
JSON

[
{
"sites": [
"reddit"
],
"task_id": 0,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/food",
"geolocation": null,
"intent_template": "I'm trying to find this post. Navigate to the comment section for it.",
"intent": "I'm trying to find this post. Navigate to the comment section for it.",
"image": "__REDDIT__/submission_images/a731d0dd8da7f9db30c5b1aa2caddc79237f23b63d01181fa394458536d30cd4.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/food/82896",
"program_html": [],
"url_note": "GOLD in PRED"
},
"reasoning_difficulty": "easy",
"visual_difficulty": "medium",
"overall_difficulty": "easy",
"comments": "post is near the top, can be done without exact image understanding",
"intent_template_id": 0
},
{
"sites": [
"reddit"
],
"task_id": 1,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/food",
"geolocation": null,
"intent_template": "I'm trying to find this post. Navigate to the comment section for it.",
"intent": "I'm trying to find this post. Navigate to the comment section for it.",
"image": "__REDDIT__/submission_images/e012f0528f58c57240a4530b9c04f07f9828053c5202d97306d528048fc002c0.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/food/60721",
"program_html": [],
"url_note": "GOLD in PRED"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "need to navigate to the second page to find this",
"intent_template_id": 0
},
{
"sites": [
"reddit"
],
"task_id": 2,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/movies",
"geolocation": null,
"intent_template": "I'm trying to find this post. Navigate to the comment section for it.",
"intent": "I'm trying to find this post. Navigate to the comment section for it.",
"image": "__REDDIT__/submission_images/91ba8efd310cfbb3dd72dcd58183713fe00378019b90ee05555fe62cbae8d46c.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/movies/107714",
"program_html": [],
"url_note": "GOLD in PRED"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "medium: need to navigate to the bottom to find this. /f/movies is not all images, but a mix of posts and images",
"intent_template_id": 0
},
{
"sites": [
"reddit"
],
"task_id": 3,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/food",
"geolocation": null,
"intent_template": "I'm trying to do some research about the post with this image. Tell me how many comments mention the word \"{{word}}\".",
"intent": "I'm trying to do some research about the post with this image. Tell me how many comments mention the word \"spicy\".",
"image": "__REDDIT__/submission_images/24793bc78359a112286f5a18e72b3eb4bf74f1ecc1518ebdce6b201c728b1b44.jpg",
"instantiation_dict": {
"word": "spicy"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"11"
]
},
"reference_url": "",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": "11"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "product image provided, and need to find the post and count all comments",
"intent_template_id": 1
},
{
"sites": [
"reddit"
],
"task_id": 4,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "I'm trying to do some research about the post with this image. Tell me how many comments mention the word \"{{word}}\".",
"intent": "I'm trying to do some research about the post with this image. Tell me how many comments mention the word \"wheat\".",
"image": "__REDDIT__/submission_images/303ce4a44cd98a77a1d3ed99511e21fcc9973fc8aa5e0382f42f3ed3c598ac2a.jpg",
"instantiation_dict": {
"word": "wheat"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"1"
]
},
"reference_url": "__REDDIT__/f/OldSchoolCool/121626/in-1982-agnes-denes-cultivated-grew-and-harvested-a-two-acre",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": "1"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "product image provided, need to navigate to the correct post in /f/OldSchoolCool and count all comments",
"intent_template_id": 1
},
{
"sites": [
"reddit"
],
"task_id": 5,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "I'm trying to do some research about the post with this image. Tell me how many comments this post has.",
"intent": "I'm trying to do some research about the post with this image. Tell me how many comments this post has.",
"image": "__REDDIT__/submission_images/1dca2e0d195036fd9d5bb3416cef4cc061796b48dc8b7a8b4d4d14229340a45e.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"121"
]
},
"reference_url": "",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": "121"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "hard (even harder for humans): product image provided, and need to identify that the post is in /f/MechanicalKeyboards, and find the correct one",
"intent_template_id": 2
},
{
"sites": [
"reddit"
],
"task_id": 6,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "I'm trying to do some research about the post with this image. Tell me how many comments this post has.",
"intent": "I'm trying to do some research about the post with this image. Tell me how many comments this post has.",
"image": "__REDDIT__/submission_images/ac7dbdd1f4053d2603a4e72e7eac53a35d1cf80c665889fbf4c2affe4415c996.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"28"
]
},
"reference_url": "__REDDIT__/f/food/82940/homemade-sweet-amp-spicy-sticky-chinese-pork-belly",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": "28"
},
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "hard (even harder for humans): product image provided, and need to identify that the post is in /f/food, and find the correct one. It's several pages in.",
"intent_template_id": 2
},
{
"sites": [
"reddit"
],
"task_id": 7,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "I'm doing some research about the post with this image. Navigate to the comment url of the recipe posted by the OP in the comments.",
"intent": "I'm doing some research about the post with this image. Navigate to the comment url of the recipe posted by the OP in the comments.",
"image": "__REDDIT__/submission_images/e1eaca118ba95fee4a25afbba5bda97e46f53b6abfffba484a79961e2058f9bd.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"fuzzy_match": "N/A"
},
"reference_url": "",
"program_html": [],
"string_note": "The OP did not provide a recipe",
"reference_answer_raw_annotation": "The OP did not provide a recipe"
},
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "hard",
"comments": "product image provided, and need to find the post and check if a recipe is provided (it is not)",
"intent_template_id": 3
},
{
"sites": [
"reddit"
],
"task_id": 8,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "I'm doing some research about the post with this image. Navigate to the comment url of the recipe posted by the OP in the comments.",
"intent": "I'm doing some research about the post with this image. Navigate to the comment url of the recipe posted by the OP in the comments.",
"image": "__REDDIT__/submission_images/24793bc78359a112286f5a18e72b3eb4bf74f1ecc1518ebdce6b201c728b1b44.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/food/104449/-/comment/1958174",
"program_html": [],
"url_note": "EXACT"
},
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "hard",
"comments": "product image provided, and need to find the post and check if a recipe is provided (it is)",
"intent_template_id": 3
},
{
"sites": [
"reddit"
],
"task_id": 9,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/food",
"geolocation": null,
"intent_template": "I'm doing some research about the post with this image. Navigate to the comment url of the recipe posted by the OP in the comments.",
"intent": "I'm doing some research about the post with this image. Navigate to the comment url of the recipe posted by the OP in the comments.",
"image": "__REDDIT__/submission_images/f85e45ccbb6417ca67872694cb4e9906fb5d52415517b5a82ccd4be43df2bce5.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/food/125449/-/comment/2293962 |OR| __REDDIT__/f/food/125449/-/comment/2305083",
"program_html": [],
"url_note": "EXACT"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "product image provided, and need to find the post (starting from /f/food) and check if a recipe is provided",
"intent_template_id": 3
},
{
"sites": [
"reddit"
],
"task_id": 10,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/food",
"geolocation": null,
"intent_template": "I'm doing some research about the post with this image. Navigate to the comment url of the recipe posted by the OP in the comments.",
"intent": "I'm doing some research about the post with this image. Navigate to the comment url of the recipe posted by the OP in the comments.",
"image": "__REDDIT__/submission_images/95279e58f603d7bc121dd49b061440053ebe8c29496c36487cb9005ba7b37515.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/food/39576/-/comment/720148",
"program_html": [],
"url_note": "EXACT"
},
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "hard",
"comments": "product image provided, and need to find the post (need to paginate starting from /f/food) and check if a recipe is provided (it is not)",
"intent_template_id": 3
},
{
"sites": [
"reddit"
],
"task_id": 11,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Can you help me find and navigate to the comments section of the post with this image?",
"intent": "Can you help me find and navigate to the comments section of the post with this image?",
"image": "__REDDIT__/submission_images/534655c4f627b6bfc41f279290a8f0dec6616d24ed7baa2a7b7054326fdd4269.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/pittsburgh/45900",
"program_html": [],
"url_note": "GOLD in PRED"
},
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "hard: product image provided, need to identify that it's of Pittsburgh, navigate to that subreddit, and click on the post",
"intent_template_id": 4
},
{
"sites": [
"reddit"
],
"task_id": 12,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Can you help me find and navigate to the comments section of the post with this image?",
"intent": "Can you help me find and navigate to the comments section of the post with this image?",
"image": "__REDDIT__/submission_images/756ea348e1bbd91001c7dc43f1c0f5f1bcd744691f12eaf535e08b680a5a8f37.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/pittsburgh/46057",
"program_html": [],
"url_note": "GOLD in PRED"
},
"reasoning_difficulty": "hard",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "hard: product image provided, need OCR to identify that it's of Pittsburgh, navigate to that subreddit, and click on the post",
"intent_template_id": 4
},
{
"sites": [
"reddit"
],
"task_id": 13,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Can you help me find and navigate to the comments section of the post with this image?",
"intent": "Can you help me find and navigate to the comments section of the post with this image?",
"image": "__REDDIT__/submission_images/dc079dc0eafe6bf8dbf1493fb9d59f6868c65ac5c3fafb71f4e6e8748d85b71b.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/Newark/35719",
"program_html": [],
"url_note": "GOLD in PRED"
},
"reasoning_difficulty": "hard",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "hard: product image provided, need to identify that it's of Newark, navigate to that subreddit, and click on the post",
"intent_template_id": 4
},
{
"sites": [
"reddit"
],
"task_id": 14,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Can you help me find and navigate to the comments section of an image post that has something like this image?",
"intent": "Can you help me find and navigate to the comments section of an image post that has something like this image?",
"image": "__REDDIT__/submission_images/4b2b1b0890ee42f1cc7447e925bcc50d37de165254e268c1e792ef8cb30ac338.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"page_image_query"
],
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__row",
"eval_vqa": [
{
"question": "Is this a photo taken at night? (yes/no)",
"answer": "yes"
},
{
"question": "Is this photo of a city skyline? (yes/no)",
"answer": "yes"
}
]
}
],
"reference_url": "",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": ""
},
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "medium: product image provided, need to find something similar out of all subreddits",
"intent_template_id": 5
},
{
"sites": [
"reddit"
],
"task_id": 15,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Can you help me find and navigate to the comments section of an image post that has something like this image?",
"intent": "Can you help me find and navigate to the comments section of an image post that has something like this image?",
"image": "coco_images/000000000285.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"page_image_query"
],
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__row",
"eval_vqa": [
{
"question": "Is this a photo of a bear? (yes/no)",
"answer": "yes"
}
]
}
],
"reference_url": "__REDDIT__/f/aww/17116",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": ""
},
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "medium: coco image provided, need to find something similar out of all subreddits",
"intent_template_id": 5
},
{
"sites": [
"reddit"
],
"task_id": 16,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "What {{question}} {{modifier}}?",
"intent": "What month and year (in mm-yyyy format) was this image posted in the /f/pics subreddit?",
"image": "__REDDIT__/submission_images/00d2341392f787ba15960934bea08a81090053c0ac17458c37c743a2eacb9f66.jpg",
"instantiation_dict": {
"question": "month and year (in mm-yyyy format)",
"modifier": "was this image posted in the /f/pics subreddit"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"10-2022"
]
},
"reference_url": "__REDDIT__/f/pics/25017/my-oil-painting-of-mcdonald-s-filet-o-fish",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": "October 2022"
},
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "hard",
"comments": "hard: product image provided, and need to find the post and identify the date",
"intent_template_id": 6
},
{
"sites": [
"reddit"
],
"task_id": 17,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Navigate to a subreddit where I can find the most similar content to this image.",
"intent": "Navigate to a subreddit where I can find the most similar content to this image.",
"image": "__REDDIT__/submission_images/43a7a1ca5d08b16fcb11926a7bcc4b16476ed8e62a55bdd917b1020f2510f2e2.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/memes",
"program_html": [],
"url_note": "GOLD in PRED"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "medium: product image provided, and need to find the meme subreddit",
"intent_template_id": 7
},
{
"sites": [
"reddit"
],
"task_id": 18,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Navigate to a subreddit where I can find the most similar content to this image.",
"intent": "Navigate to a subreddit where I can find the most similar content to this image.",
"image": "__REDDIT__/submission_images/7a1e98e9fa4016d835815a04aa9375d82955931805163defaa185c8cf8217b47.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/MechanicalKeyboards",
"program_html": [],
"url_note": "GOLD in PRED"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "medium: product image provided, and need to find the mechanical keyboards subreddit",
"intent_template_id": 7
},
{
"sites": [
"reddit"
],
"task_id": 19,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Find and upvote this post. I think it was posted in {{forum}}.",
"intent": "Find and upvote this post. I think it was posted in /f/GetMotivated.",
"image": "__REDDIT__/submission_images/1c42177287fc4285f7ff6755c520e8484b35aaf63e764f2aaa93764195cdcde0.jpg",
"instantiation_dict": {
"forum": "/f/GetMotivated"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/f/GetMotivated/76904/image-you-are-tomorrow",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"must_include": [
"vote vote--user-upvoted"
]
}
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "hard: product image provided, need to find the correct post and upvote it",
"intent_template_id": 8
},
{
"sites": [
"reddit"
],
"task_id": 20,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Find and upvote this post. I think it was posted in {{forum}}.",
"intent": "Find and upvote this post. I think it was posted in /f/space.",
"image": "__REDDIT__/media/cache/resolve/submission_thumbnail_1x/a1920694edca7656fc929bab1166d2d667181e3aa495147cbe319dbadcd6cccb.jpg",
"instantiation_dict": {
"forum": "/f/space"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/f/space/91926/milkdromeda",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"must_include": [
"vote vote--user-upvoted"
]
}
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "hard: product image provided, need to find the correct post (second page in /f/space) and upvote it",
"intent_template_id": 8
},
{
"sites": [
"reddit"
],
"task_id": 21,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Find and upvote this post. I think it was posted in {{forum}}.",
"intent": "Find and upvote this post. I think it was posted in /f/OldSchoolCool.",
"image": "__REDDIT__/submission_images/97414d47ab9fe6c197aaa93f5157a80d59db4df0cbc9b683c7cb5aa9ad326e0e.jpg",
"instantiation_dict": {
"forum": "/f/OldSchoolCool"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/f/OldSchoolCool/56995/goerge-clooney-1990",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"must_include": [
"vote vote--user-upvoted"
]
}
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "hard: product image provided, need to find the correct post (third page in /f/OldSchoolCool) and upvote it",
"intent_template_id": 8
},
{
"sites": [
"reddit"
],
"task_id": 22,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Find this post and tell me what the top comment says.",
"intent": "Find this post and tell me what the top comment says.",
"image": "__REDDIT__/submission_images/38fa7f08e147044f478f01b6a5a01d0798abf168016a3b38d2cc84779cb210e1.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"Very rare. I swear last time i saw these on ebay they were like \u00a3700"
]
},
"reference_url": "__REDDIT__/f/consoles/103611",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": "Very rare. I swear last time i saw these on ebay they were like \u00a3700"
},
"intent_template_id": 9,
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "hard: dark product image provided, need to find the correct post and identify the top comment. __REDDIT__/f/consoles/103611/-/comment/1742498"
},
{
"sites": [
"reddit"
],
"task_id": 23,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Find this post and tell me what the top comment says.",
"intent": "Find this post and tell me what the top comment says.",
"image": "__REDDIT__/submission_images/2319bc580d861d7543159a48d1bc6085b1e62bfddb0673b35c09f924fec3e9ba.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"https://imgur.com/a/P1YyboM"
]
},
"reference_url": "__REDDIT__/f/photoshopbattles/24816/-/comment/155650",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": "https://imgur.com/a/P1YyboM"
},
"intent_template_id": 9,
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "hard: image provided, need to find the correct post and identify the top comment."
},
{
"sites": [
"reddit"
],
"task_id": 24,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Find this post and tell me what the top comment says.",
"intent": "Find this post and tell me what the top comment says.",
"image": "__REDDIT__/submission_images/c00998a8eba2c76e09451d6a01f93052f2aebd87808db67e451151aa22467578.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"Nah fam, that's a",
"psychonaut"
]
},
"reference_url": "__REDDIT__/f/pics/45632/-/comment/792645",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": "Nah fam, that's a psychonaut"
},
"intent_template_id": 9,
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "hard: image provided, need to find the correct post and identify the top comment."
},
{
"sites": [
"reddit"
],
"task_id": 25,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Find this post and tell me what the top comment says.",
"intent": "Find this post and tell me what the top comment says.",
"image": "__REDDIT__/submission_images/bf9827860b0a877f292bbc9082ecadada26d3a8757982478c30c1a121de15bcd.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"Pompadour Cotinga I believe. On the top 10 list of birds I will never see in my garden."
]
},
"reference_url": "__REDDIT__/f/pics/45697/-/comment/794907",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": "Pompadour Cotinga I believe. On the top 10 list of birds I will never see in my garden."
},
"intent_template_id": 9,
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "hard: image provided, need to find the correct post and identify the top comment. "
},
{
"sites": [
"reddit"
],
"task_id": 26,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Find this post and tell me what the top comment says.",
"intent": "Find this post and tell me what the top comment says.",
"image": "https://images.pexels.com/photos/9782305/pexels-photo-9782305.jpeg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"fuzzy_match": "N/A"
},
"reference_url": "",
"program_html": [],
"string_note": "This post does not exist on the site.",
"reference_answer_raw_annotation": "This post does not exist on the site."
},
"intent_template_id": 9,
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "hard: image provided, need to identify that there is no such post"
},
{
"sites": [
"reddit"
],
"task_id": 27,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Make a Reddit post with this image ({{image_url}}) in the most relevant subreddit, and navigate to its comments section.",
"intent": "Make a Reddit post with this image (https://images.pexels.com/photos/14267038/pexels-photo-14267038.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2) in the most relevant subreddit, and navigate to its comments section.",
"image": "https://images.pexels.com/photos/14267038/pexels-photo-14267038.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2",
"instantiation_dict": {
"image_url": "https://images.pexels.com/photos/14267038/pexels-photo-14267038.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2"
},
"require_reset": false,
"eval": {
"eval_types": [
"url_match",
"page_image_query"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/MechanicalKeyboards",
"url_note": "GOLD in PRED",
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__content",
"eval_fuzzy_image_match": "https://images.pexels.com/photos/14267038/pexels-photo-14267038.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2"
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "hard",
"comments": "hard: existing models probably cannot handle this task because of screen browsing/navigation capabilities",
"intent_template_id": 10
},
{
"sites": [
"reddit"
],
"task_id": 28,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Write a story about this picture and post it in the most relevant subreddit.",
"intent": "Write a story about this picture and post it in the most relevant subreddit.",
"image": "coco_images/000000515982.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"program_html",
"page_image_query"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/pics",
"program_html": [
{
"url": "last",
"locator": "func:get_query_text(__page__, '.submission__inner')",
"required_contents": {
"must_include": [
"baseball"
]
}
},
{
"url": "last",
"locator": "func:get_query_text(__page__, '.submission__submitter')",
"required_contents": {
"must_include": [
"MarvelsGrantMan136"
]
}
},
{
"url": "last",
"locator": "func:get_query_text(__page__, '.submission__info')",
"required_contents": {
"must_include": [
"pics |OR| sports"
]
}
}
],
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__image",
"eval_fuzzy_image_match": "coco_images/000000515982.jpg"
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "hard",
"comments": "",
"intent_template_id": 11
},
{
"sites": [
"reddit"
],
"task_id": 29,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Write a story about this picture and post it in the most relevant subreddit.",
"intent": "Write a story about this picture and post it in the most relevant subreddit.",
"image": "https://images.pexels.com/photos/1170986/pexels-photo-1170986.jpeg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"program_html",
"page_image_query"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/pics",
"program_html": [
{
"url": "last",
"locator": "func:get_query_text(__page__, '.submission__inner')",
"required_contents": {
"must_include": [
"cat"
]
}
},
{
"url": "last",
"locator": "func:get_query_text(__page__, '.submission__submitter')",
"required_contents": {
"must_include": [
"MarvelsGrantMan136"
]
}
},
{
"url": "last",
"locator": "func:get_query_text(__page__, '.submission__info')",
"required_contents": {
"must_include": [
"pics |OR| aww"
]
}
}
],
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__image",
"eval_fuzzy_image_match": "https://images.pexels.com/photos/1170986/pexels-photo-1170986.jpeg"
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "hard",
"comments": "",
"intent_template_id": 11
},
{
"sites": [
"reddit"
],
"task_id": 30,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Find the user who posted this and navigate to the comments section of one of their other posts.",
"intent": "Find the user who posted this and navigate to the comments section of one of their other posts.",
"image": "__REDDIT__/submission_images/4f93a8def3912dd323c901b33d9ee862138f15152632d91f6f2fff0d2ff0a9e6.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/gaming/105429",
"program_html": [],
"url_note": "GOLD in PRED"
},
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "hard: need to navigate to the appropriate subreddit (based off gaming content), identify the correct user, and navigate to their other posts. Original URL: __REDDIT__/f/gaming/126114/oc-loot",
"intent_template_id": 12
},
{
"sites": [
"reddit"
],
"task_id": 31,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Find the user who posted this and navigate to the comments section of one of their other posts.",
"intent": "Find the user who posted this and navigate to the comments section of one of their other posts.",
"image": "__REDDIT__/submission_images/d3333ad4839961f9f765b514279fc0730c9c7fee194ecd3fd069e3f15ef86234.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"fuzzy_match": "N/A"
},
"reference_url": "",
"program_html": [],
"string_note": "This user does not have other posts.",
"reference_answer_raw_annotation": "This user does not have other posts."
},
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "hard: need to navigate to the appropriate subreddit, identify the correct user, and find out that they don't have other posts (__REDDIT__/f/pics/89308/the-tropical-rainbow-lobster-panulirus-ornatus)",
"intent_template_id": 12
},
{
"sites": [
"reddit"
],
"task_id": 32,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Find the user who posted this and navigate to the comments section of one of their other posts.",
"intent": "Find the user who posted this and navigate to the comments section of one of their other posts.",
"image": "__REDDIT__/submission_images/bd60b4847804344faecd02aaaa6cb280024a63cd3610283ab2de3406f3f75cf8.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/nyc/44500 |OR| __REDDIT__/f/nyc/44527 |OR| __REDDIT__/f/pics/89551 |OR| __REDDIT__/f/pics/110950 |OR| __REDDIT__/f/nyc/130377",
"program_html": [],
"url_note": "GOLD in PRED"
},
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "hard: need to navigate to the appropriate subreddit (based off DC Wharf), identify the correct user, and navigate to their other posts",
"intent_template_id": 12
},
{
"sites": [
"reddit"
],
"task_id": 33,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Find the user who posted this and navigate to the comments section of one of their other posts.",
"intent": "Find the user who posted this and navigate to the comments section of one of their other posts.",
"image": "__REDDIT__/submission_images/7a0c19ac8feaf6539c069a1ddbdda91e491fdacd6980a34d205d94b4fcc08ebb.gif",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/gaming/126203/last-of-us-on-pc-part-ii",
"program_html": [],
"url_note": "GOLD in PRED"
},
"reasoning_difficulty": "hard",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "hard: need to navigate to the appropriate subreddit (based off gaming content), identify the correct user, and navigate to their other posts (__REDDIT__/f/gaming/126063/last-of-us-on-pc)",
"intent_template_id": 12
},
{
"sites": [
"reddit"
],
"task_id": 34,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Navigate to the comments section of the {{ranking}} image post in the {{subreddit}} subreddit that {{attribute}}.",
"intent": "Navigate to the comments section of the all time top ranked image post in the /f/boston subreddit that is a photo of the city.",
"image": null,
"instantiation_dict": {
"ranking": "all time top ranked",
"subreddit": "/f/boston",
"attribute": "is a photo of the city"
},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/boston/124422",
"program_html": [],
"url_note": "GOLD in PRED"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "medium: need to navigate to the correct subreddit and identify the first post that contains a picture of the city",
"intent_template_id": 13
},
{
"sites": [
"reddit"
],
"task_id": 35,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Navigate to the comments section of the {{ranking}} image post in the {{subreddit}} subreddit that {{attribute}}.",
"intent": "Navigate to the comments section of the latest image post in the /f/washingtondc subreddit that contains trees.",
"image": null,
"instantiation_dict": {
"ranking": "latest",
"subreddit": "/f/washingtondc",
"attribute": "contains trees"
},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/washingtondc/136819",
"program_html": [],
"url_note": "GOLD in PRED"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "need to navigate to the correct subreddit and identify the first post that contains a picture of trees (second page)",
"intent_template_id": 13
},
{
"sites": [
"reddit"
],
"task_id": 36,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Navigate to the comments section of the {{ranking}} image post in the {{subreddit}} subreddit that {{attribute}}.",
"intent": "Navigate to the comments section of the all time top ranked image post in the /f/nyc subreddit that contains an animal.",
"image": null,
"instantiation_dict": {
"ranking": "all time top ranked",
"subreddit": "/f/nyc",
"attribute": "contains an animal"
},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/nyc/44619",
"program_html": [],
"url_note": "GOLD in PRED"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "need to navigate to the correct subreddit and identify the top post that contains a picture of animals (there is a lanternfly costume which is a distractor)",
"intent_template_id": 13
},
{
"sites": [
"reddit"
],
"task_id": 37,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Navigate to the comments section of the {{ranking}} image post in the {{subreddit}} subreddit that {{attribute}}.",
"intent": "Navigate to the comments section of the latest image post in the /f/Art subreddit that contains animals.",
"image": null,
"instantiation_dict": {
"ranking": "latest",
"subreddit": "/f/Art",
"attribute": "contains animals"
},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/Art/116643",
"program_html": [],
"url_note": "GOLD in PRED"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "need to navigate to the correct subreddit and identify the first post that contains a picture of animals (not described in text)",
"intent_template_id": 13
},
{
"sites": [
"reddit"
],
"task_id": 38,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "I'd like to find the subreddit for the city this photo was taken in. Can you navigate to it?",
"intent": "I'd like to find the subreddit for the city this photo was taken in. Can you navigate to it?",
"image": "https://images.pexels.com/photos/11355109/pexels-photo-11355109.jpeg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/jerseycity",
"program_html": [],
"url_note": "GOLD in PRED"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "hard: need to identify that the picture is of Jersey City, and navigate there",
"intent_template_id": 14
},
{
"sites": [
"reddit"
],
"task_id": 39,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "I'd like to find the subreddit for the city this photo was taken in. Can you navigate to it?",
"intent": "I'd like to find the subreddit for the city this photo was taken in. Can you navigate to it?",
"image": "https://images.pexels.com/photos/208745/pexels-photo-208745.jpeg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"fuzzy_match": "N/A"
},
"reference_url": "",
"program_html": [],
"string_note": "San Francisco does not have a subreddit.",
"reference_answer_raw_annotation": "San Francisco does not have a subreddit."
},
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "Need to identify that the picture is of San Francisco (easy), and figure out that it doesn't have a subreddit",
"intent_template_id": 14
},
{
"sites": [
"reddit"
],
"task_id": 40,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "I'd like to find the subreddit for the city this photo was taken in. Can you navigate to it?",
"intent": "I'd like to find the subreddit for the city this photo was taken in. Can you navigate to it?",
"image": "https://images.pexels.com/photos/18221027/pexels-photo-18221027/free-photo-of-aerial-photo-of-pittsburgh-downtown-skyline.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/pittsburgh",
"program_html": [],
"url_note": "GOLD in PRED"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "Need to identify that the picture is of Pittsburgh (easy/medium), and navigate there",
"intent_template_id": 14
},
{
"sites": [
"reddit"
],
"task_id": 41,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Navigate to the comments section of the first image post in {{subreddit}} that {{attribute}}.",
"intent": "Navigate to the comments section of the first image post in /f/Newark as sorted by hot that does not have people in it.",
"image": null,
"instantiation_dict": {
"subreddit": "/f/Newark as sorted by hot",
"attribute": "does not have people in it"
},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/Newark/100584",
"program_html": [],
"url_note": "GOLD in PRED"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "",
"intent_template_id": 15
},
{
"sites": [
"reddit"
],
"task_id": 42,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/forums/all",
"geolocation": null,
"intent_template": "Navigate to the comments section of the first image post in {{subreddit}} that {{attribute}}.",
"intent": "Navigate to the comments section of the first image post in /f/MechanicalKeyboards as sorted by hot that does not have a keyboard in its photo.",
"image": null,
"instantiation_dict": {
"subreddit": "/f/MechanicalKeyboards as sorted by hot",
"attribute": "does not have a keyboard in its photo"
},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/MechanicalKeyboards/78235",
"program_html": [],
"url_note": "GOLD in PRED"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "",
"intent_template_id": 15
},
{
"sites": [
"shopping",
"reddit"
],
"task_id": 43,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__SHOPPING__ |AND| __REDDIT__/forums",
"geolocation": null,
"intent_template": "Gather the title of {{product}} from OneStopShop, and post its full product name in the {{subreddit}} subreddit under the title \"{{post_title}}\"",
"instantiation_dict": {
"product": "the cheapest yellow video projector in the \"Video Projectors\" category",
"subreddit": "/f/gadgets",
"post_title": "Has anyone bought this product?"
},
"intent": "Gather the title of the cheapest yellow video projector in the \"Video Projectors\" category from OneStopShop, and post its full product name in the /f/gadgets subreddit under the title \"Has anyone bought this product?\"",
"require_reset": false,
"eval": {
"eval_types": [
"url_match",
"program_html"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/gadgets",
"url_note": "GOLD in PRED",
"program_html": [
{
"url": "func:reddit_get_post_url('__last_url__')",
"locator": "func:get_query_text(__page__, '.submission__title')",
"required_contents": {
"exact_match": "Has anyone bought this product?"
}
},
{
"url": "func:reddit_get_post_url('__last_url__')",
"locator": "func:get_query_text(__page__, '.submission__body')",
"required_contents": {
"must_include": [
"RUIQIMAO Mini Projector Portable 1080P Home Cinema Projector Theater Video 100''Display LCD HiFi Audio for TV Box/Xbox/TF Card/U Disk P2L1"
]
}
}
]
},
"intent_template_id": 16,
"comments": "__SHOPPING__/ruiqimao-mini-projector-portable-1080p-home-cinema-projector-theater-video-100-display-lcd-hifi-audio-for-tv-box-xbox-tf-card-u-disk-p2l1.html",
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "hard"
},
{
"sites": [
"shopping",
"reddit"
],
"task_id": 44,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__SHOPPING__ |AND| __REDDIT__/forums",
"geolocation": null,
"intent_template": "Find an image of {{product}} from OneStopShop, and post it as an image in the {{subreddit}} subreddit with the title as the name of the piece and the artist's name.",
"instantiation_dict": {
"product": "the most expensive artwork from the Posters & Prints category",
"subreddit": "/f/Art"
},
"intent": "Find an image of the most expensive artwork from the Posters & Prints category from OneStopShop, and post it as an image in the /f/Art subreddit with the title as the name of the piece and the artist's name.",
"require_reset": false,
"eval": {
"eval_types": [
"url_match",
"program_html",
"page_image_query"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/Art",
"url_note": "GOLD in PRED",
"program_html": [
{
"url": "func:reddit_get_post_url('__last_url__')",
"locator": "func:get_query_text(__page__, '.submission__title')",
"required_contents": {
"must_include": [
"Frozen Shores",
"Marion Griese"
]
}
}
],
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__image",
"eval_fuzzy_image_match": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09NRZ3LZW.0.jpg"
}
]
},
"intent_template_id": 17,
"comments": "__SHOPPING__/fine-art-canvas-frozen-shores-canvas-print-artwork-by-marion-griese-45-w-x-60-h-vertical-gallery-wrapped-large-wall-decor.html",
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "hard"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 45,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/60156 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "What is the total area in square miles, of the states that are ranked as having 150+ endangered species? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "is the total area in square miles, of the states",
"modifier": "are ranked as having 150+ endangered species"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"561400 |OR| 561,400"
]
}
},
"intent_template_id": 18,
"reasoning_difficulty": "hard",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "Texas (268,596) + Alabama (52,419) + Florida (65,758) + California (163,696) + Hawaii (10,931) = 561400 square miles"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 46,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/103854 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "What is the 2022 total nominal GDP of the area that produces most sugarcane in the year of 2021? (in billion)? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "is the 2022 total nominal GDP of the area",
"modifier": "produces most sugarcane in the year of 2021? (in billion)"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"39000 |OR| 39,000"
]
}
},
"intent_template_id": 18,
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Asia: 39 trillion"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 47,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/103905 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "What is the area (in km^2) of the country that emitted second most plastic waste into the ocean? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "is the area (in km^2) of the country",
"modifier": "emitted second most plastic waste into the ocean"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"3287263 |OR| 3,287,263"
]
}
},
"intent_template_id": 18,
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/India: 3,287,263 km^2"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 48,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "What {{question}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "What is the absolute difference between NVIDIA's FY22 net income listed in wikipedia and reddit forum 'dataisbeautiful' (in billions)? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "is the absolute difference between NVIDIA's FY22 net income listed in wikipedia and reddit forum 'dataisbeautiful' (in billions)"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"5.35"
]
}
},
"intent_template_id": 19,
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Nvidia: 9.75B, __REDDIT__/f/dataisbeautiful/124787/oc-nvidia-s-fy22-income-statement: 4.4B"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 49,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/82181 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "What is the first year the Summer Olympics include the women's version of the sport that had the largest average male athlete weight in the 2012 Olympics? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "is the first year the Summer Olympics include the women's version of the sport",
"modifier": "had the largest average male athlete weight in the 2012 Olympics"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"1976"
]
}
},
"intent_template_id": 18,
"reasoning_difficulty": "hard",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Rowing_(sport): ...The introduction of women's rowing at the 1976 Summer Olympics in Montreal increased..."
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 50,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/18254 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "What is the name of the major commercial airport in the state that is colored green? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "is the name of the major commercial airport in the state",
"modifier": "is colored green"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"Seattle-Tacoma |OR| Sea-Tac"
]
}
},
"intent_template_id": 18,
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Washington_(state): ... Seattle-Tacoma International Airport (Sea-Tac) is the major commercial airport of greater Seattle..."
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 51,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/18267 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "What is the ISIN of the company that occupies the largest portion in Warren Buffet's portfolio? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "is the ISIN of the company",
"modifier": "occupies the largest portion in Warren Buffet's portfolio"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"exact_match": "US0378331005"
}
},
"intent_template_id": 18,
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Apple_Inc."
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 52,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/124826 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "What is the largest ethnic group in the country that has the least average number of physicians per person? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "is the largest ethnic group in the country",
"modifier": "has the least average number of physicians per person"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"exact_match": "Hausa"
}
},
"intent_template_id": 18,
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Niger"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 53,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/60171 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "What {{question}} {{modifier}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "What was the total asset value (in trillions of USD, to 3 decimal places) of the fifth largest employer in China in 2017? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "was the total asset value (in trillions of USD, to 3 decimal places)",
"modifier": "of the fifth largest employer in China in 2017"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"0.585"
]
}
},
"intent_template_id": 20,
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/State_Grid_Corporation_of_China, US$ 585.278 billion (2017)"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 54,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/18215 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "When {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "When did programming language that has the largest variance in salary first appear? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "did programming language",
"modifier": "has the largest variance in salary first appear"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"exact_match": "August 1993"
}
},
"intent_template_id": 21,
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/R_(programming_language)"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 55,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/38920 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "Who {{question}}, {{modifier}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "Who is the first spouse of the second most philanthropic billionaire, measured in percentage of wealth donated in 2022? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "is the first spouse of the second most philanthropic billionaire",
"modifier": "measured in percentage of wealth donated in 2022"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"exact_match": "Annaliese Witschak"
}
},
"intent_template_id": 22,
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/George_Soros"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 56,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/124820 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "What are the three major geographical divisions of the state that has the third highest number of bank failure? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "are the three major geographical divisions of the state",
"modifier": "has the third highest number of bank failure"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"Northern",
"Central",
"Southern",
"Illinois"
]
}
},
"intent_template_id": 18,
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Illinois, Section Divisions"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 57,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/124818 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "What is the sum of the area, in sq km, of the top two countries that has the most uniformly dense transportation network? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "is the sum of the area, in sq km, of the top two countries",
"modifier": "has the most uniformly dense transportation network"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"387711 |OR| 387,711"
]
}
},
"intent_template_id": 18,
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Belgium: 30689, __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Germany: 357022"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 58,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/38990 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "Who {{question}} {{modifier}}?",
"intent": "Who is the author of the most popular novel adapted anime in year 2012?",
"image": [],
"instantiation_dict": {
"question": "is the author",
"modifier": "of the most popular novel adapted anime in year 2012"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"exact_match": "Reki Kawahara"
}
},
"intent_template_id": 23,
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Sword_Art_Online"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 59,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/39014 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "What is the total employees (in 2021) of the accounting firms that provide both tax and legal services? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "is the total employees (in 2021) of the accounting firms",
"modifier": "provide both tax and legal services"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"876374 |OR| 876,374"
]
}
},
"intent_template_id": 18,
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Deloitte: 345374, __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/PricewaterhouseCoopers: 295000, __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/KPMG: 236000"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 60,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/82296 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "What is the ISBN of the genre fiction book that takes OP the longest time to read in 2022? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "is the ISBN of the genre fiction book",
"modifier": "takes OP the longest time to read in 2022"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"exact_match": "0385302304"
}
},
"intent_template_id": 18,
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Outlander_(novel)"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 61,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/103828 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "Who {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "Who directed the movie that is shown as the background picture of the third highest earning movie production companies? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "directed the movie",
"modifier": "is shown as the background picture of the third highest earning movie production companies"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"exact_match": "Sam Mendes"
}
},
"intent_template_id": 24,
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Skyfall"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 62,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/82234 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "Who {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "Who potrayed the character in friends that has the most intimate connections? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "potrayed the character in friends",
"modifier": "has the most intimate connections"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"exact_match": "Lisa Kudrow"
}
},
"intent_template_id": 24,
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Phoebe_Buffay"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 63,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/82193 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "What is the highest elevation (in ft) of the home state of the team that surpassed Buffalo Bills in both defensive rating and offensive rating during the 2022 Season? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "is the highest elevation (in ft) of the home state of the team",
"modifier": "surpassed Buffalo Bills in both defensive rating and offensive rating during the 2022 Season"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"3213 |OR| 3,213"
]
}
},
"intent_template_id": 18,
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Philadelphia_Eagles"
},
{
"sites": [
"reddit"
],
"task_id": 64,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/60133",
"geolocation": null,
"intent_template": "What {{question}} that {{modifier}}?",
"intent": "What are the colors of the flag used in the country that is the third largest producer of cherries in year 2012?",
"image": [],
"instantiation_dict": {
"question": "are the colors of the flag used in the country",
"modifier": "is the third largest producer of cherries in year 2012"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"red",
"green",
"white"
]
}
},
"intent_template_id": 25,
"reasoning_difficulty": "easy",
"visual_difficulty": "hard",
"overall_difficulty": "medium",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Italy"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 65,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/82170 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "What is the total city population in 2021 of the two cities that the densest populated in the population density map of Portugal? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "is the total city population in 2021 of the two cities",
"modifier": "the densest populated in the population density map of Portugal"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"776813 |OR| 776,813"
]
}
},
"intent_template_id": 18,
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Porto (231962), __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Lisbon (544851)"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 66,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/124984 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "What {{question}} that {{modifier}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "What is the third most popular non-english language (as of 2016) in the state that has the deepest snow during 2022-2023 winter? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "is the third most popular non-english language (as of 2016) in the state",
"modifier": "has the deepest snow during 2022-2023 winter"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"exact_match": "Tagalog"
}
},
"intent_template_id": 18,
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "medium",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/California"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 67,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/60120 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "Who {{question}} that {{modifier}}?",
"intent": "Who is the founder of the brand that has zero stores in Washington state (before 2021) according to the chart in the first tab?",
"image": [],
"instantiation_dict": {
"question": "is the founder of the brand",
"modifier": "has zero stores in Washington state (before 2021) according to the chart in the first tab"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"exact_match": "Bill Rosenberg"
}
},
"intent_template_id": 26,
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Dunkin'_Donuts"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 68,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/dataisbeautiful/124889 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "Who {{question}}, {{modifier}}? Answer using the information from the Wikipedia site in the second tab.",
"intent": "Who is the founder of the second largest asset management firm, as measured by AUM in the end of 2022? Answer using the information from the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"question": "is the founder of the second largest asset management firm",
"modifier": "as measured by AUM in the end of 2022"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"John",
"Bogle"
]
}
},
"intent_template_id": 22,
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/The_Vanguard_Group"
},
{
"sites": [
"reddit"
],
"task_id": 69,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/newhampshire/129011",
"geolocation": null,
"intent_template": "Make a comment in this post {{criteria}}.",
"intent": "Make a comment in this post explaining what the picture is about.",
"image": [],
"instantiation_dict": {
"criteria": "explaining what the picture is about"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/f/newhampshire/129011",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"fuzzy_match": "The picture is making a joke about New Hampshire being too cold by exaggerating that spring doesn't exist there."
}
}
]
},
"intent_template_id": 27,
"reasoning_difficulty": "easy",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": ""
},
{
"sites": [
"reddit"
],
"task_id": 70,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/pics/new",
"geolocation": null,
"intent_template": "How many comments are {{status}} in the post that {{post_modifier}}",
"intent": "How many comments are removed or deleted in the post that has a turkey picture in the hot posts of f/pics",
"image": [],
"instantiation_dict": {
"status": "removed or deleted",
"post_modifier": "has a turkey picture in the hot posts of f/pics"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"16"
]
}
},
"intent_template_id": 28,
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "__REDDIT__/f/pics/45604. Counting is hard."
},
{
"sites": [
"reddit"
],
"task_id": 71,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/memes/new",
"geolocation": null,
"intent_template": "Find me {{description}} of {{object}}. Then, {{action}}.",
"intent": "Find me the most recent animated gif of spiderman in f/memes. Then, leave a comment that describes what the spiderman is doing in the meme.",
"image": [],
"instantiation_dict": {
"description": "the most recent animated gif",
"object": "spiderman in f/memes",
"action": "leave a comment that describes what the spiderman is doing in the meme"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/f/memes/127908",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"fuzzy_match": "The spiderman is flying."
}
}
]
},
"intent_template_id": 29,
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "__REDDIT__/f/memes/127908, fourth page under chronological order: __REDDIT__/f/memes/new?next%5Bid%5D=127916"
},
{
"sites": [
"reddit"
],
"task_id": 72,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/memes/127531",
"geolocation": null,
"intent_template": "Make a comment in this post {{criteria}}.",
"intent": "Make a comment in this post explaining what the picture is about.",
"image": [],
"instantiation_dict": {
"criteria": "explaining what the picture is about"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/f/memes/127531",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"fuzzy_match": "The picture is saying that everything is disappointing, regardless if you have a job or are unemployed."
}
}
]
},
"intent_template_id": 27,
"reasoning_difficulty": "easy",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": ""
},
{
"sites": [
"reddit"
],
"task_id": 73,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Find a post in {{forum}} that {{description}}. Then, repost the picture to {{forum2}} with {{characteristic}}. Finally, {{action}}.",
"intent": "Find a post in f/MechanicalKeyboards that contains a special keyboard that has a huge W key. Then, repost the picture to f/gaming with title \"WwWwWwWwWwwWw all the way\". Finally, navigate to the post.",
"image": [],
"instantiation_dict": {
"forum": "f/MechanicalKeyboards",
"description": "contains a special keyboard that has a huge W key",
"forum2": "f/gaming",
"characteristic": "title \"WwWwWwWwWwwWw all the way\"",
"action": "navigate to the post"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html",
"page_image_query",
"url_match"
],
"reference_answers": null,
"reference_url": "f/gaming",
"url_note": "GOLD in PRED",
"program_html": [
{
"url": "last",
"locator": "lambda:(() => { try { return document.querySelector('.submission__link').textContent } catch { return '' } })()",
"required_contents": {
"exact_match": "WwWwWwWwWwwWw all the way"
}
}
],
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__image",
"eval_fuzzy_image_match": "__REDDIT__/submission_images/c876b20d6b3b643774e61d6a3c0914a9bc013118c9f10bdc9ff4e940a084f4b9.jpg"
}
]
},
"intent_template_id": 30,
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "__REDDIT__/f/MechanicalKeyboards/14329"
},
{
"sites": [
"reddit"
],
"task_id": 74,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Find me {{description}} of {{object}}. Then, {{action}}.",
"intent": "Find me a keyboard version of what the man is wearing in the picture. Then, count how many white key is on it and post it in the comment section of that post with the following format: \"It has <number of keys> white keys!!\" (not including the double inverted commas), replacing <number of keys> with the actual number of white keys in the picture. Remember that partially occluded keycaps need to be counted also.",
"image": [
"https://images.pexels.com/photos/2955376/pexels-photo-2955376.jpeg"
],
"instantiation_dict": {
"description": "a keyboard version",
"object": "what the man is wearing in the picture",
"action": "count how many white key is on it and post it in the comment section of that post with the following format: \"It has <number of keys> white keys!!\" (not including the double inverted commas), replacing <number of keys> with the actual number of white keys in the picture. Remember that partially occluded keycaps need to be counted also"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/f/MechanicalKeyboards/56378",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"must_include": [
"It has 38 white keys!! |OR| It has 39 white keys!!"
]
}
}
]
},
"intent_template_id": 29,
"reasoning_difficulty": "hard",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "__REDDIT__/f/MechanicalKeyboards/56378 -- there's a very tiny part of a key between the gap in the upper left, so accepting both 38 and 39."
},
{
"sites": [
"reddit",
"shopping"
],
"task_id": 75,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__SHOPPING__ |AND| __REDDIT__",
"geolocation": null,
"intent_template": "How should I {{task}}? Search {{forum}} for the answer and {{action}}.",
"intent": "How should I keep my keyboard clean while eating cheese balls and typing on keyboard at the same time? Search f/MechanicalKeyboards for the answer and buy me the tool on the shopping site.",
"image": [],
"instantiation_dict": {
"task": "keep my keyboard clean while eating cheese balls and typing on keyboard at the same time",
"forum": "f/MechanicalKeyboards",
"action": "buy me the tool on the shopping site"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "func:shopping_get_latest_order_url()",
"locator": "func:shopping_get_order_product_name_list(__page__)",
"required_contents": {
"must_include": [
"Chopsticks"
]
}
}
]
},
"intent_template_id": 31,
"reasoning_difficulty": "hard",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "__REDDIT__/f/MechanicalKeyboards/120949, "
},
{
"sites": [
"reddit"
],
"task_id": 76,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/funny/125907",
"geolocation": null,
"intent_template": "{{question}}? {{action}} and create a comment with the following format: {{comment_format}}.",
"intent": "How much money does the person have? Downvote the post and create a comment with the following format: \"This is a <amount of money (in digits only)> bill from <country>.\" (not including the double inverted commas), replacing <amount of money> with the actual amount of money the person has and <country> with the country that issue the bill.",
"image": [],
"instantiation_dict": {
"question": "How much money does the person have",
"action": "Downvote the post",
"comment_format": "\"This is a <amount of money (in digits only)> bill from <country>.\" (not including the double inverted commas), replacing <amount of money> with the actual amount of money the person has and <country> with the country that issue the bill"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": null,
"program_html": [
{
"url": "__REDDIT__/f/funny/125907",
"locator": "document.querySelector('div.submission__vote').querySelector('form').getAttribute('class')",
"required_contents": {
"must_include": [
"vote vote--user-downvoted"
]
}
},
{
"url": "__REDDIT__/f/funny/125907",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"must_include": [
"This is a 20000000000 bill from Zimbabwe. |OR| This is a 20,000,000,000 bill from Zimbabwe."
]
}
}
]
},
"intent_template_id": 32,
"reasoning_difficulty": "easy",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "__REDDIT__/f/funny/125907"
},
{
"sites": [
"reddit"
],
"task_id": 77,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Navigate to {{object}} that {{description}}.",
"intent": "Navigate to the comments section of any post that contains Mona Lisa in the picture.",
"image": [],
"instantiation_dict": {
"object": "the comments section of any post",
"description": "contains Mona Lisa in the picture"
},
"require_reset": false,
"eval": {
"eval_types": [
"page_image_query"
],
"reference_answers": null,
"reference_url": null,
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__image",
"eval_vqa": [
{
"question": "Is Mona Lisa in the picture? (yes/no)",
"answer": "yes"
}
]
}
]
},
"intent_template_id": 33,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "__REDDIT__/f/pics/45680"
},
{
"sites": [
"reddit"
],
"task_id": 78,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Navigate to {{object}} that {{description}}.",
"intent": "Navigate to the comments section of any post that contains a silver banana in the picture.",
"image": [],
"instantiation_dict": {
"object": "the comments section of any post",
"description": "contains a silver banana in the picture"
},
"require_reset": false,
"eval": {
"eval_types": [
"page_image_query"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/pics/25213",
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__image",
"eval_vqa": [
{
"question": "Is banana in the picture? (yes/no)",
"answer": "yes"
},
{
"question": "Is the banana in the sliver? (yes/no)",
"answer": "yes"
}
]
}
]
},
"intent_template_id": 33,
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "__REDDIT__/f/pics/25213"
},
{
"sites": [
"reddit"
],
"task_id": 79,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Navigate to {{object}} that {{description}}.",
"intent": "Navigate to the comments section of any post that contains Rick and Morty in the picture.",
"image": [],
"instantiation_dict": {
"object": "the comments section of any post",
"description": "contains Rick and Morty in the picture"
},
"require_reset": false,
"eval": {
"eval_types": [
"page_image_query"
],
"reference_answers": null,
"reference_url": null,
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__image",
"eval_vqa": [
{
"question": "Is Rick and Morty in the picture? (yes/no)",
"answer": "yes"
}
]
}
]
},
"intent_template_id": 33,
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "__REDDIT__/f/mildlyinteresting/42152"
},
{
"sites": [
"reddit"
],
"task_id": 80,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/mildlyinteresting",
"geolocation": null,
"intent_template": "Upvote all the posts that {{modifier}} on this page.",
"intent": "Upvote all the posts that have a picture which contains food on this page.",
"image": [],
"instantiation_dict": {
"modifier": "have a picture which contains food"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": null,
"program_html": [
{
"url": "__REDDIT__/f/mildlyinteresting/42188",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/mildlyinteresting/63435",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/mildlyinteresting/128010",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/mildlyinteresting/128086",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/mildlyinteresting/128184",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
},
{
"url": "__REDDIT__/f/mildlyinteresting/42138",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
},
{
"url": "__REDDIT__/f/mildlyinteresting/85669",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
},
{
"url": "__REDDIT__/f/mildlyinteresting/42237",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
}
]
},
"intent_template_id": 34,
"reasoning_difficulty": "easy",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "Note: Does not check every single post on the page. Picture 2 (42188), 4 (63435), 8 (128010), 24 (128086) has food."
},
{
"sites": [
"reddit"
],
"task_id": 81,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/photoshopbattles",
"geolocation": null,
"intent_template": "Upvote all the posts that {{modifier}} on this page.",
"intent": "Upvote all the posts that have a picture which contains cat on this page.",
"image": [],
"instantiation_dict": {
"modifier": "have a picture which contains cat"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": null,
"program_html": [
{
"url": "__REDDIT__/f/photoshopbattles/45482",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/photoshopbattles/24822",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/photoshopbattles/131315",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/photoshopbattles/89211",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/photoshopbattles/131323",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
},
{
"url": "__REDDIT__/f/photoshopbattles/24783",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
},
{
"url": "__REDDIT__/f/photoshopbattles/67033",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
},
{
"url": "__REDDIT__/f/photoshopbattles/66941",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
}
]
},
"intent_template_id": 34,
"reasoning_difficulty": "easy",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "Note: Does not check every single post on the page. Picture 10 (45482), 16 (24822), 18 (131315), 19 (89211) has cat."
},
{
"sites": [
"reddit"
],
"task_id": 82,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/gaming?next%5Branking%5D=2111&next%5Bid%5D=61422",
"geolocation": null,
"intent_template": "Upvote all the posts that {{modifier}} on this page.",
"intent": "Upvote all the posts that have a picture which contains Mario on this page.",
"image": [],
"instantiation_dict": {
"modifier": "have a picture which contains Mario"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": null,
"program_html": [
{
"url": "__REDDIT__/f/gaming/40345",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/gaming/19625",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/gaming/126151",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/gaming/126047",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/gaming/126131",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
},
{
"url": "__REDDIT__/f/gaming/126104",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
},
{
"url": "__REDDIT__/f/gaming/19706",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
},
{
"url": "__REDDIT__/f/gaming/83745",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
}
]
},
"intent_template_id": 34,
"reasoning_difficulty": "easy",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "Note: Does not check every single post on the page. Picture 6 (40345), 8 (19625), 23 (126151), 25 (126047) has Mario."
},
{
"sites": [
"reddit"
],
"task_id": 83,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/Art/active",
"geolocation": null,
"intent_template": "Upvote all the posts that {{modifier}} on this page.",
"intent": "Upvote all the posts that have a picture which contains a human or human-like face on this page.",
"image": [],
"instantiation_dict": {
"modifier": "have a picture which contains a human or human-like face"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": null,
"program_html": [
{
"url": "__REDDIT__/f/Art/116512",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/Art/116619",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/Art/116536",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/Art/116517",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/Art/116504",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/Art/116469",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/Art/116444",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/Art/116617",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/Art/116583",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/Art/116508",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/Art/116439",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/Art/116461",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/Art/116595",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
},
{
"url": "__REDDIT__/f/Art/116610",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
},
{
"url": "__REDDIT__/f/Art/116476",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
},
{
"url": "__REDDIT__/f/Art/116538",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
}
]
},
"intent_template_id": 34,
"reasoning_difficulty": "easy",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "Note: Does not check every single post on the page. Picture 1 (116512), 2 (116619), 3 (116536), 4, 6 (116517), 9 (116504), 11 (116469), 15 (116444), 16 (116617), 17 (116583), 19 (116508), 21 (116439), 24 (116461) has human face."
},
{
"sites": [
"reddit"
],
"task_id": 84,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/consoles",
"geolocation": null,
"intent_template": "Downvote the posts {{modifier1}} and upvote the posts {{modifier2}} but {{action}}.",
"intent": "Downvote the posts in odd row and upvote the posts in even row but skip the one with a fox icon.",
"image": [],
"instantiation_dict": {
"modifier1": "in odd row",
"modifier2": "in even row",
"action": "skip the one with a fox icon"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": null,
"program_html": [
{
"url": "__REDDIT__/f/consoles/124522",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/consoles/124584",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/consoles/124590",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/consoles/81946",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/consoles/81967",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/consoles/124571",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
},
{
"url": "__REDDIT__/f/consoles/124523",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-downvoted"
}
},
{
"url": "__REDDIT__/f/consoles/81949",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-downvoted"
}
},
{
"url": "__REDDIT__/f/consoles/124520",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-downvoted"
}
},
{
"url": "__REDDIT__/f/consoles/124545",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-downvoted"
}
},
{
"url": "__REDDIT__/f/consoles/103639",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-downvoted"
}
}
]
},
"intent_template_id": 35,
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "Note: Does not check every single post on the page. Upvote: 2 (124522), 4 (124584), 6, 8, 10 (124590), 12, 16, 18 (81946), 20, 22 (81967), 24; Downvote: 1 (124523), 3, 5 (81949), 7, 9, 11 (124520), 13, 15, 17 (124545), 19, 21 (103639), 23, 25. Skip 14 (124571)."
},
{
"sites": [
"reddit",
"wikipedia"
],
"task_id": 85,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/EarthPorn?next%5Branking%5D=18106&next%5Bid%5D=98160 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "Consider {{modifier}}. Downvote the posts {{modifier1}} and upvote the posts {{modifier2}}.",
"intent": "Consider all posts with bodies of water in this page. Downvote the posts that are taken in northern hemisphere and upvote the posts that are taken in the southern hemisphere.",
"image": [],
"instantiation_dict": {
"modifier": "all posts with bodies of water in this page",
"modifier1": "that are taken in northern hemisphere",
"modifier2": "that are taken in the southern hemisphere"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": null,
"program_html": [
{
"url": "__REDDIT__/f/EarthPorn/98160",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-downvoted"
}
},
{
"url": "__REDDIT__/f/EarthPorn/119215",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-downvoted"
}
},
{
"url": "__REDDIT__/f/EarthPorn/119100",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-downvoted"
}
},
{
"url": "__REDDIT__/f/EarthPorn/76382",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-downvoted"
}
},
{
"url": "__REDDIT__/f/EarthPorn/76394",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-downvoted"
}
},
{
"url": "__REDDIT__/f/EarthPorn/76351",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
},
{
"url": "__REDDIT__/f/EarthPorn/76405",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
},
{
"url": "__REDDIT__/f/EarthPorn/76416",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
},
{
"url": "__REDDIT__/f/EarthPorn/76387",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
},
{
"url": "__REDDIT__/f/EarthPorn/54431",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/EarthPorn/119220",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/EarthPorn/119108",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
}
]
},
"intent_template_id": 36,
"reasoning_difficulty": "hard",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "Note: Does not check every single post on the page. north: 1 (98160), 2, 6 (119215), 7, 8, 10, 11 (119100), 12 (76382), 13, 15, 18 (76394), 19, 20, 23; south: 4 (54431), 17 (119220), 21 (119108)"
},
{
"sites": [
"reddit"
],
"task_id": 86,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/space",
"geolocation": null,
"intent_template": "Consider {{modifier}}. What is {{question}}?",
"intent": "Consider all post in this page. What is the username of the user that posted the picture where the moon takes up the largest proportion of the image?",
"image": [],
"instantiation_dict": {
"modifier": "all post in this page",
"question": "the username of the user that posted the picture where the moon takes up the largest proportion of the image"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"daryavaseum"
]
},
"reference_url": null
},
"intent_template_id": 37,
"reasoning_difficulty": "easy",
"visual_difficulty": "medium",
"overall_difficulty": "easy",
"comments": "test scrolling. __REDDIT__/f/space/27421"
},
{
"sites": [
"reddit"
],
"task_id": 87,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/aww",
"geolocation": null,
"intent_template": "Consider {{modifier}}. What is {{question}}?",
"intent": "Consider all post in this page. What is the total number of dogs shown?",
"image": [],
"instantiation_dict": {
"modifier": "all post in this page",
"question": "the total number of dogs shown"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"9"
]
},
"reference_url": null
},
"intent_template_id": 37,
"reasoning_difficulty": "easy",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "test scrolling"
},
{
"sites": [
"reddit"
],
"task_id": 88,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/headphones?next%5Branking%5D=514&next%5Bid%5D=126662",
"geolocation": null,
"intent_template": "Upvote all the posts that {{modifier}} on this page.",
"intent": "Upvote all the posts that do not contain headphones on this page.",
"image": [],
"instantiation_dict": {
"modifier": "do not contain headphones"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": null,
"program_html": [
{
"url": "__REDDIT__/f/headphones/105770",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/headphones/20186",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote vote--user-upvoted"
}
},
{
"url": "__REDDIT__/f/headphones/126662",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
},
{
"url": "__REDDIT__/f/headphones/61993",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
},
{
"url": "__REDDIT__/f/headphones/105934",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
},
{
"url": "__REDDIT__/f/headphones/84223",
"locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')",
"required_contents": {
"exact_match": "vote"
}
}
]
},
"intent_template_id": 34,
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "Note: Does not check every single post on the page. Upvote: 2, 3, 4 (105770), 8 (20186), 14, 18 (126736), 19 (20035), 22, 24 (105847). Ignores __REDDIT__/f/headphones/126736, __REDDIT__/f/headphones/20035, __REDDIT__/f/headphones/105847 for ambiguity (headphones vs. earphones). Test negative selection"
},
{
"sites": [
"reddit"
],
"task_id": 89,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/space/112972",
"geolocation": null,
"intent_template": "{{question}}? Leave a comment with the following format: {{format}}.",
"intent": "How many Jupiter instances are in this picture? Leave a comment with the following format: \"It has <number of Jupiter> Jupiter!!\" (not including the double inverted commas), replacing <number of Jupiter> with the actual number of Jupiter in the picture. Remember to use digits.",
"image": [],
"instantiation_dict": {
"question": "How many Jupiter instances are in this picture",
"format": "\"It has <number of Jupiter> Jupiter!!\" (not including the double inverted commas), replacing <number of Jupiter> with the actual number of Jupiter in the picture. Remember to use digits"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": null,
"program_html": [
{
"url": "__REDDIT__/f/space/112972",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"exact_match": "It has 4 Jupiter!!"
}
}
]
},
"intent_template_id": 38,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "some basic tasks"
},
{
"sites": [
"reddit"
],
"task_id": 90,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/mildlyinteresting/21720",
"geolocation": null,
"intent_template": "{{question}}? Leave a comment with the following format: {{format}}.",
"intent": "How many teeth does the pumpkin have? Leave a comment with the following format: \"It has <number of teeth> teeth :)\" (not including the double inverted commas), replacing <number of teeth> with the actual number of teeth the pumpkin has. Remember to use digits.",
"image": [],
"instantiation_dict": {
"question": "How many teeth does the pumpkin have",
"format": "\"It has <number of teeth> teeth :)\" (not including the double inverted commas), replacing <number of teeth> with the actual number of teeth the pumpkin has. Remember to use digits"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": null,
"program_html": [
{
"url": "__REDDIT__/f/mildlyinteresting/21720",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"exact_match": "It has 4 teeth :)"
}
}
]
},
"intent_template_id": 38,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "some basic tasks"
},
{
"sites": [
"reddit"
],
"task_id": 91,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/mildlyinteresting/42168",
"geolocation": null,
"intent_template": "{{question}}? Leave a comment with the following format: {{format}}.",
"intent": "What is the color of topmost container? Leave a comment with the following format: \"Let's go <color of container>\" (not including the double inverted commas), replacing <color of container> with the actual color of the topmost container. Remember to use basic color names.",
"image": [],
"instantiation_dict": {
"question": "What is the color of topmost container",
"format": "\"Let's go <color of container>\" (not including the double inverted commas), replacing <color of container> with the actual color of the topmost container. Remember to use basic color names"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": null,
"program_html": [
{
"url": "__REDDIT__/f/mildlyinteresting/42168",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"exact_match": "Let's go green"
}
}
]
},
"intent_template_id": 38,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "some basic tasks"
},
{
"sites": [
"reddit"
],
"task_id": 92,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "{{question}} the picture {{modifier}} in {{forum}}?",
"intent": "What is the animal in the picture from the most active post in f/aww?",
"image": [],
"instantiation_dict": {
"question": "What is the animal in",
"modifier": "from the most active post",
"forum": "f/aww"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"dog"
]
}
},
"intent_template_id": 39,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "some basic tasks"
},
{
"sites": [
"reddit"
],
"task_id": 93,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "{{question}} the picture {{modifier}} in {{forum}}?",
"intent": "What is the color of the plate used for the picture from the most controversial post of all time in f/food?",
"image": [],
"instantiation_dict": {
"question": "What is the color of the plate used for",
"modifier": "from the most controversial post of all time",
"forum": "f/food"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"white"
]
}
},
"intent_template_id": 39,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "some basic tasks. __REDDIT__/f/food/controversial?t=all"
},
{
"sites": [
"reddit"
],
"task_id": 94,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "{{question}} the picture {{modifier}} in {{forum}}?",
"intent": "Is there snow in the picture from the hottest post in f/EarthPorn?",
"image": [],
"instantiation_dict": {
"question": "Is there snow in",
"modifier": "from the hottest post",
"forum": "f/EarthPorn"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"yes"
]
}
},
"intent_template_id": 39,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "some basic tasks"
},
{
"sites": [
"reddit"
],
"task_id": 95,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "{{question}} the picture {{modifier}} in {{forum}}?",
"intent": "What is the color of the snow near the Chrismas tree in the picture from the second hottest post in f/newhaven?",
"image": [],
"instantiation_dict": {
"question": "What is the color of the snow near the Chrismas tree in",
"modifier": "from the second hottest post",
"forum": "f/newhaven"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"purple |OR| pink"
]
}
},
"intent_template_id": 39,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "__REDDIT__/f/newhaven/64803/first-snowfall-of-the-season-on-the-green"
},
{
"sites": [
"reddit"
],
"task_id": 96,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "{{question}} the picture {{modifier}} in {{forum}}?",
"intent": "Who authored the post shown in the picture (not the reddit author) of the most commented post of all time in f/Newark?",
"image": [],
"instantiation_dict": {
"question": "Who authored the post shown in",
"modifier": "(not the reddit author) of the most commented post of all time",
"forum": "f/Newark"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"Councilman Michael Silva"
]
}
},
"intent_template_id": 39,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "some basic tasks"
},
{
"sites": [
"reddit"
],
"task_id": 97,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "{{question}} the picture {{modifier}} in {{forum}}?",
"intent": "What is the color of the radio in the picture from the newest post in f/BuyItForLife?",
"image": [],
"instantiation_dict": {
"question": "What is the color of the radio in",
"modifier": "from the newest post",
"forum": "f/BuyItForLife"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"white"
]
}
},
"intent_template_id": 39,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "some basic tasks"
},
{
"sites": [
"reddit"
],
"task_id": 98,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Navigate to the comments section of a post that {{condition}}.",
"intent": "Navigate to the comments section of a post that contains a picture of a dog.",
"image": [],
"instantiation_dict": {
"condition": "contains a picture of a dog"
},
"require_reset": false,
"eval": {
"eval_types": [
"page_image_query"
],
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__image",
"eval_vqa": [
{
"question": "Does the picture contain a dog? (yes/no)",
"answer": "yes"
}
]
}
]
},
"intent_template_id": 40,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "just search \"dog picture\""
},
{
"sites": [
"reddit"
],
"task_id": 99,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Navigate to the comments section of a post that {{condition}}.",
"intent": "Navigate to the comments section of a post that contains a picture of a dog and a cat.",
"image": [],
"instantiation_dict": {
"condition": "contains a picture of a dog and a cat"
},
"require_reset": false,
"eval": {
"eval_types": [
"page_image_query"
],
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__image",
"eval_vqa": [
{
"question": "Does the picture contain a dog? (yes/no)",
"answer": "yes"
},
{
"question": "Does the picture contain a cat? (yes/no)",
"answer": "yes"
}
]
}
]
},
"intent_template_id": 40,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "just search \"dog cat picture\""
},
{
"sites": [
"reddit"
],
"task_id": 100,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Navigate to the comments section of a post that {{condition}}.",
"intent": "Navigate to the comments section of a post that contains a picture of a cake.",
"image": [],
"instantiation_dict": {
"condition": "contains a picture of a cake"
},
"require_reset": false,
"eval": {
"eval_types": [
"page_image_query"
],
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__image",
"eval_vqa": [
{
"question": "Does the picture contain a cake? (yes/no)",
"answer": "yes"
}
]
}
]
},
"intent_template_id": 40,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "just search \"cake\""
},
{
"sites": [
"reddit"
],
"task_id": 101,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Navigate to the comments section of a post that {{condition}}.",
"intent": "Navigate to the comments section of a post that contains a picture of a keyboard.",
"image": [],
"instantiation_dict": {
"condition": "contains a picture of a keyboard"
},
"require_reset": false,
"eval": {
"eval_types": [
"page_image_query"
],
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__image",
"eval_vqa": [
{
"question": "Does the picture contain a keyboard? (yes/no)",
"answer": "yes"
}
]
}
]
},
"intent_template_id": 40,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "just search \"keyboard\""
},
{
"sites": [
"reddit"
],
"task_id": 102,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/MechanicalKeyboards/56362",
"geolocation": null,
"intent_template": "{{question}}? Leave a comment replying to user \"{{user}}\" with the following format: {{format}}.",
"intent": "How many red keys are in the picture? Leave a comment replying to user \"liverblow\" with the following format: \"It has <number of red key> $%@\" (not including the double inverted commas), replacing <number of red key> with the actual number of red key. Remember to use digits.",
"image": [],
"instantiation_dict": {
"question": "How many red keys are in the picture",
"user": "liverblow",
"format": "\"It has <number of red key> $%@\" (not including the double inverted commas), replacing <number of red key> with the actual number of red key. Remember to use digits"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/f/MechanicalKeyboards/56362",
"locator": "func:reddit_get_parent_comment_username_of_latest_comment_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"exact_match": "liverblow"
}
},
{
"url": "__REDDIT__/f/MechanicalKeyboards/56362",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"exact_match": "It has 3 $%@"
}
}
]
},
"intent_template_id": 41,
"reasoning_difficulty": "easy",
"visual_difficulty": "medium",
"overall_difficulty": "easy",
"comments": ""
},
{
"sites": [
"reddit"
],
"task_id": 103,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/memes/41674",
"geolocation": null,
"intent_template": "{{question}}? Leave a comment replying to user \"{{user}}\" with the following format: {{format}}.",
"intent": "What is the color of the mattress? Leave a comment replying to user \"BiAdventureTime\" with the following format: \"Its color is <color of the mattress> ~~~~``~~~~\" (not including the quote marks), replacing <color of the mattress> with the actual color of the mattress. Remember to use basic color names.",
"image": [],
"instantiation_dict": {
"question": "What is the color of the mattress",
"user": "BiAdventureTime",
"format": "\"Its color is <color of the mattress> ~~~~``~~~~\" (not including the quote marks), replacing <color of the mattress> with the actual color of the mattress. Remember to use basic color names"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/f/memes/41674",
"locator": "func:reddit_get_parent_comment_username_of_latest_comment_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"exact_match": "BiAdventureTime"
}
},
{
"url": "__REDDIT__/f/MechanicalKeyboards/56362",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"exact_match": "Its color is blue ~~~~``~~~~"
}
}
]
},
"intent_template_id": 41,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": ""
},
{
"sites": [
"reddit"
],
"task_id": 104,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/memes/21059",
"geolocation": null,
"intent_template": "{{question}}? Leave a comment replying to user \"{{user}}\" with the following format: {{format}}.",
"intent": "How many kirbies are in the picture? Leave a comment replying to user \"Valenyn\" with the following format: \"There are <number of kirbies> kirbies *\uff65\u309c\uff9f\uff65*:.\uff61..\uff61.:*\uff65'(*\uff9f\u25bd\uff9f*)'\uff65*:.\uff61. .\uff61.:*\uff65\u309c\uff9f\uff65*\" (not including the quote marks), replacing <number of kirbies> with the actual number of kirbies. Remember to use digits.",
"image": [],
"instantiation_dict": {
"question": "How many kirbies are in the picture",
"user": "Valenyn",
"format": "\"There are <number of kirbies> kirbies *\uff65\u309c\uff9f\uff65*:.\uff61..\uff61.:*\uff65'(*\uff9f\u25bd\uff9f*)'\uff65*:.\uff61. .\uff61.:*\uff65\u309c\uff9f\uff65*\" (not including the quote marks), replacing <number of kirbies> with the actual number of kirbies. Remember to use digits"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/f/memes/21059",
"locator": "func:reddit_get_parent_comment_username_of_latest_comment_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"exact_match": "Valenyn"
}
},
{
"url": "__REDDIT__/f/MechanicalKeyboards/56362",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"exact_match": "There are 3 kirbies *\uff65\u309c\uff9f\uff65*:.\uff61..\uff61.:*\uff65'(*\uff9f\u25bd\uff9f*)'\uff65*:.\uff61. .\uff61.:*\uff65\u309c\uff9f\uff65*"
}
}
]
},
"intent_template_id": 41,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": ""
},
{
"sites": [
"reddit"
],
"task_id": 105,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Subscribe to a forum that contains {{picture}}.",
"intent": "Subscribe to a forum that contains one of the following pictures.",
"image": [
"__REDDIT__/submission_images/73dc8340f062f2514d681f058f166291f996308c853b0991b1d70aeec290bc3f.png",
"__REDDIT__/submission_images/c3e0f19b9c34fb663c059e6001b96c416d67f95e74ef923c3814effe734d1ffe.jpg"
],
"instantiation_dict": {
"picture": "one of the following pictures"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__",
"locator": "func:get_query_text(__page__, '.sidebar')",
"required_contents": {
"must_include": [
"gaming |OR| mildlyinteresting"
]
}
}
]
},
"intent_template_id": 42,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "__REDDIT__/126091"
},
{
"sites": [
"reddit"
],
"task_id": 106,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Subscribe to a forum that contains {{picture}}.",
"intent": "Subscribe to a forum that contains the following picture.",
"image": "__REDDIT__/submission_images/1479b251ab514f120dc5049645aebb24c1ffb22c6a7c358322f8f7e901caa330.png",
"instantiation_dict": {
"picture": "the following picture"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__",
"locator": "func:get_query_text(__page__, '.sidebar')",
"required_contents": {
"must_include": [
"dataisbeautiful"
]
}
}
]
},
"intent_template_id": 42,
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "__REDDIT__/39053"
},
{
"sites": [
"reddit"
],
"task_id": 107,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Subscribe to a forum that contains {{picture}}.",
"intent": "Subscribe to a forum that contains the following picture.",
"image": [
"__REDDIT__/submission_images/c35e8ec31c5d6749de609413a87e2cacc00a5b5173fdd2c32bee563539c95349.jpg"
],
"instantiation_dict": {
"picture": "the following picture"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__",
"locator": "func:get_query_text(__page__, '.sidebar')",
"required_contents": {
"must_include": [
"iphone"
]
}
}
]
},
"intent_template_id": 42,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "__REDDIT__/41010"
},
{
"sites": [
"reddit"
],
"task_id": 108,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Send a direct message to the user that created the post that contains the following picture.",
"intent": "Send a direct message to the user that created the post that contains the following picture.",
"image": [
"__REDDIT__/submission_images/56770deac1bc216e20097184d3b5710708cc72cc3bb8ed00883066f2815020cd.jpg"
],
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__/messages",
"locator": "func:get_query_text(__page__, '.table')",
"required_contents": {
"must_include": [
"J3RRYLIKESCHEESE"
]
}
}
]
},
"intent_template_id": 43,
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "__REDDIT__/27284"
},
{
"sites": [
"reddit"
],
"task_id": 109,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Send a direct message to the user that created the post that contains the following picture.",
"intent": "Send a direct message to the user that created the post that contains the following picture.",
"image": [
"__REDDIT__/submission_images/e44dc6ea011d1876ea96210b4aae47f8dc334dd975f53b6fa902535a0748bee4.jpg"
],
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__/messages",
"locator": "func:get_query_text(__page__, '.table')",
"required_contents": {
"must_include": [
"Throwaway0114845892"
]
}
}
]
},
"intent_template_id": 43,
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "__REDDIT__/107539"
},
{
"sites": [
"reddit"
],
"task_id": 110,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Send a direct message to the user that created the post that contains the following picture.",
"intent": "Send a direct message to the user that created the post that contains the following picture.",
"image": [
"__REDDIT__/submission_images/0e2afc6ce7d06b708f04ece3b0481d64aed4deab399c13e6eb9343525c730993.png"
],
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__/messages",
"locator": "func:get_query_text(__page__, '.table')",
"required_contents": {
"must_include": [
"lostinam3rica"
]
}
}
]
},
"intent_template_id": 43,
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "__REDDIT__/38612"
},
{
"sites": [
"reddit"
],
"task_id": 111,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Create a post for each of the following image in the most related forum.",
"intent": "Create a post for each of the following image in the most related forum.",
"image": [
"https://images.pexels.com/photos/2539395/pexels-photo-2539395.jpeg",
"https://images.pexels.com/photos/16194823/pexels-photo-16194823/free-photo-of-people-in-front-of-quincy-market-in-usa.jpeg"
],
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"page_image_query"
],
"reference_answers": null,
"page_image_query": [
{
"eval_image_url": "__REDDIT__/f/Washington/top?t=day",
"eval_image_class": ".submission__title",
"eval_fuzzy_image_match": "https://images.pexels.com/photos/2539395/pexels-photo-2539395.jpeg",
"ssim_threshold": 0.6
},
{
"eval_image_url": "__REDDIT__/f/boston/top?t=day",
"eval_image_class": ".submission__title",
"eval_fuzzy_image_match": "https://images.pexels.com/photos/16194823/pexels-photo-16194823/free-photo-of-people-in-front-of-quincy-market-in-usa.jpeg",
"ssim_threshold": 0.6
}
]
},
"intent_template_id": 44,
"reasoning_difficulty": "hard",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "/f/Washington, /f/boston"
},
{
"sites": [
"reddit"
],
"task_id": 112,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Create a post for each of the following image in the most related forum.",
"intent": "Create a post for each of the following image in the most related forum.",
"image": [
"https://images.pexels.com/photos/2150/sky-space-dark-galaxy.jpg",
"https://images.pexels.com/photos/1037992/pexels-photo-1037992.jpeg"
],
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"page_image_query"
],
"reference_answers": null,
"page_image_query": [
{
"eval_image_url": "__REDDIT__/f/space/top?t=day",
"eval_image_class": ".submission__title",
"eval_fuzzy_image_match": "https://images.pexels.com/photos/2150/sky-space-dark-galaxy.jpg",
"ssim_threshold": 0.6
},
{
"eval_image_url": "__REDDIT__/f/headphones/top?t=day",
"eval_image_class": ".submission__title",
"eval_fuzzy_image_match": "https://images.pexels.com/photos/1037992/pexels-photo-1037992.jpeg",
"ssim_threshold": 0.6
}
]
},
"intent_template_id": 44,
"reasoning_difficulty": "hard",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "/f/space, /f/headphones"
},
{
"sites": [
"reddit"
],
"task_id": 113,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Create a post for each of the following image in the most related forum.",
"intent": "Create a post for each of the following image in the most related forum.",
"image": [
"https://images.pexels.com/photos/417074/pexels-photo-417074.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1",
"https://images.pexels.com/photos/2706653/pexels-photo-2706653.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1",
"https://images.pexels.com/photos/7947958/pexels-photo-7947958.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1"
],
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"page_image_query"
],
"reference_answers": null,
"page_image_query": [
{
"eval_image_url": "__REDDIT__/f/EarthPorn/top?t=day",
"eval_image_class": ".submission__title",
"eval_fuzzy_image_match": "https://images.pexels.com/photos/417074/pexels-photo-417074.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1",
"ssim_threshold": 0.6
},
{
"eval_image_url": "__REDDIT__/f/nyc/top?t=day",
"eval_image_class": ".submission__title",
"eval_fuzzy_image_match": "https://images.pexels.com/photos/2706653/pexels-photo-2706653.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1",
"ssim_threshold": 0.6
},
{
"eval_image_url": "__REDDIT__/f/dataisbeautiful/top?t=day",
"eval_image_class": ".submission__title",
"eval_fuzzy_image_match": "https://images.pexels.com/photos/7947958/pexels-photo-7947958.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1",
"ssim_threshold": 0.6
}
]
},
"intent_template_id": 44,
"reasoning_difficulty": "hard",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "/f/EarthPorn, /f/nyc, /f/dataisbeautiful"
},
{
"sites": [
"reddit"
],
"task_id": 114,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Navigate to the {{status}} across {{forums}}. {{question}}?",
"intent": "Navigate to the second most active post with a picture across f/washingtondc, f/vermont and f/nyc. What is in the picture?",
"image": [],
"instantiation_dict": {
"status": "second most active post with a picture",
"forums": "f/washingtondc, f/vermont and f/nyc",
"question": "What is in the picture"
},
"require_reset": false,
"eval": {
"eval_types": [
"url_match",
"string_match"
],
"reference_answers": {
"must_include": [
"firetruck |OR| fire engine"
]
},
"reference_url": "__REDDIT__/f/washingtondc/136747",
"url_note": "GOLD in PRED"
},
"intent_template_id": 45,
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "hard",
"comments": "sol1: __REDDIT__/f/washingtondc+vermont+nyc/active?t=all; sol2: subscribe/unsubscribe to related forum and the result will show in homepage"
},
{
"sites": [
"reddit"
],
"task_id": 115,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Navigate to the {{status}} across {{forums}}. {{question}}?",
"intent": "Navigate to the most controversial post of all time (with a picture) across f/Paterson, f/WaterburyCT and f/monitor. What operating system is the person using (windows/ubuntu/arch/macos)?",
"image": [],
"instantiation_dict": {
"status": "most controversial post of all time (with a picture)",
"forums": "f/Paterson, f/WaterburyCT and f/monitor",
"question": "What operating system is the person using (windows/ubuntu/arch/macos)"
},
"require_reset": false,
"eval": {
"eval_types": [
"url_match",
"string_match"
],
"reference_answers": {
"must_include": [
"windows"
]
},
"reference_url": "__REDDIT__/f/monitor/107539",
"url_note": "GOLD in PRED"
},
"intent_template_id": 45,
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "sol1: __REDDIT__/f/monitor+Paterson+WaterburyCT/controversial?t=all; sol2: subscribe/unsubscribe to related forum and the result will show in homepage"
},
{
"sites": [
"reddit"
],
"task_id": 116,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Navigate to the {{status}} across {{forums}}. {{question}}?",
"intent": "Navigate to the most commented post of all time (with a picture) across f/aww and f/dataisbeautiful. Does the picture contain an animal (yes/no)?",
"image": [],
"instantiation_dict": {
"status": "most commented post of all time (with a picture)",
"forums": "f/aww and f/dataisbeautiful",
"question": "Does the picture contain an animal (yes/no)"
},
"require_reset": false,
"eval": {
"eval_types": [
"url_match",
"string_match"
],
"reference_answers": {
"must_include": [
"yes"
]
},
"reference_url": "__REDDIT__/f/aww/80903",
"url_note": "GOLD in PRED"
},
"intent_template_id": 45,
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "hard",
"comments": "sol1: __REDDIT__/f/aww+dataisbeautiful/most_commented?t=all; sol2: subscribe/unsubscribe to related forum and the result will show in homepage"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 117,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/memes/85200 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "What is the birth year of the {{character}}? Answer using the Wikipedia site in the second tab.",
"intent": "What is the birth year of the dog with pink hair? Answer using the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"character": "dog with pink hair"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"2005"
]
}
},
"intent_template_id": 46,
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Doge_(meme)"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 118,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/memes/106760 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "What is the birth year of the {{character}}? Answer using the Wikipedia site in the second tab.",
"intent": "What is the birth year of the guy in red jacket? Answer using the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"character": "guy in red jacket"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"1986"
]
}
},
"intent_template_id": 46,
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Drake_(musician)"
},
{
"sites": [
"wikipedia",
"reddit"
],
"task_id": 119,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/memes/62937 |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
"geolocation": null,
"intent_template": "What is the birth year of the {{character}}? Answer using the Wikipedia site in the second tab.",
"intent": "What is the birth year of the guy holding the goblet? Answer using the Wikipedia site in the second tab.",
"image": [],
"instantiation_dict": {
"character": "guy holding the goblet"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"1974"
]
}
},
"intent_template_id": 46,
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Leonardo_DiCaprio"
},
{
"sites": [
"reddit"
],
"task_id": 120,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/submission_images/361ec602ee63f8d052217fe6577e7163471ae1b86280acd498aa626a28239da2.jpg",
"geolocation": null,
"intent_template": "According to the image, {{question}}?",
"intent": "According to the image, which country in this picture has the worst rail system?",
"image": [],
"instantiation_dict": {
"question": "which country in this picture has the worst rail system"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"U.S. |OR| United States |OR| US"
]
}
},
"intent_template_id": 47,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "__REDDIT__/f/memes/106690"
},
{
"sites": [
"reddit"
],
"task_id": 121,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "I'm doing some research about the post with this image. Navigate to the recipe posted by the OP in the comments.",
"intent": "I'm doing some research about the post with this image. Navigate to the recipe posted by the OP in the comments.",
"image": "__REDDIT__/submission_images/026347b13e015519ffdb759fdb368c33ba7ada80b66e4ff8947fcc9cd3c41433.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/food/125525/-/comment/2481750",
"program_html": [],
"url_note": "EXACT"
},
"intent_template_id": 48,
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "hard",
"comments": "__REDDIT__/f/food/125525/homemade-margherita-pizza"
},
{
"sites": [
"reddit"
],
"task_id": 122,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "I'm doing some research about the post with this image. Navigate to the recipe posted by the OP in the comments.",
"intent": "I'm doing some research about the post with this image. Navigate to the recipe posted by the OP in the comments.",
"image": "__REDDIT__/submission_images/44269ef7e842aaa07cbe1de220cd59dd2161be3f445f6ca913da8fee650e438f.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/food/18967/-/comment/114718",
"program_html": [],
"url_note": "EXACT"
},
"intent_template_id": 48,
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "hard",
"comments": "__REDDIT__/f/food/18967/homemade-korean-fried-chicken"
},
{
"sites": [
"reddit"
],
"task_id": 123,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Find me a post in {{forum}} that illustrates the {{company}} revenue in {{datetime}}, and tell me the revenue of {{department}} (in billions).",
"intent": "Find me a post in /f/wallstreetbets that illustrates the Amazon revenue in Q3 FY22, and tell me the revenue of the online store in Q3 FY22 (in billions).",
"image": null,
"instantiation_dict": {
"forum": "/f/wallstreetbets",
"company": "Amazon",
"datetime": "Q3 FY22",
"department": "the online store in Q3 FY22"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match",
"url_match"
],
"reference_answers": {
"must_include": [
"53.4"
]
},
"reference_url": "__REDDIT__/f/wallstreetbets/29340 |OR| __REDDIT__/f/wallstreetbets/29340/visual-breakdown-of-amzn-q3-fy22-income-statement"
},
"intent_template_id": 49,
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "__REDDIT__/f/wallstreetbets/29340"
},
{
"sites": [
"reddit"
],
"task_id": 124,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Find me a post in {{forum}} that illustrates the {{company}} revenue in {{datetime}}, and tell me the revenue of {{department}} (in billions).",
"intent": "Find me a post in /f/dataisbeautiful that illustrates the Microsoft revenue in 1985, and tell me the revenue of the whole company in 1985 (in billions).",
"image": null,
"instantiation_dict": {
"forum": "/f/dataisbeautiful",
"company": "Microsoft",
"datetime": "1985",
"department": "the whole company in 1985"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"0.14"
]
},
"reference_url": "__REDDIT__/f/dataisbeautiful/18389 |OR| __REDDIT__/f/dataisbeautiful/18389/oc-microsoft-ipo-vs-now-breaking-down-its-revenue-and"
},
"intent_template_id": 49,
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "__REDDIT__/f/dataisbeautiful/18389"
},
{
"sites": [
"reddit"
],
"task_id": 125,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Find me a post in {{forum}} that illustrates the {{company}} revenue in {{datetime}}, and tell me the revenue of {{department}} (in billions).",
"intent": "Find me a post in /f/dataisbeautiful that illustrates the Nvidia revenue in FY22, and tell me the revenue of the whole company in FY22 (in billions).",
"image": null,
"instantiation_dict": {
"forum": "/f/dataisbeautiful",
"company": "Nvidia",
"datetime": "FY22",
"department": "the whole company in FY22"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match",
"url_match"
],
"reference_answers": {
"must_include": [
"27"
]
},
"reference_url": "__REDDIT__/f/dataisbeautiful/124787 |OR| __REDDIT__/f/dataisbeautiful/124787/oc-nvidia-s-fy22-income-statement"
},
"intent_template_id": 49,
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "__REDDIT__/f/dataisbeautiful/124787"
},
{
"sites": [
"reddit"
],
"task_id": 126,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you navigate me to the most commented post by the author of this image? {{hint}}",
"intent": "Can you navigate me to the most commented post by the author of this image? I think it is in the /f/memes forum.",
"image": "__REDDIT__/submission_images/84eb6034d17330ca0f528293ed68c9106639a5e616c5d88b80503da22bf5ab24.jpg",
"instantiation_dict": {
"hint": "I think it is in the /f/memes forum."
},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/photoshopbattles/24855 |OR| __REDDIT__/f/photoshopbattles/24855/psbattle-this-dog-and-her-pups",
"program_html": [],
"url_note": "EXACT"
},
"intent_template_id": 50,
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "hard",
"comments": "__REDDIT__/f/memes/85103"
},
{
"sites": [
"reddit"
],
"task_id": 127,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you navigate me to the most commented post by the author of this image? {{hint}}",
"intent": "Can you navigate me to the most commented post by the author of this image? I think it is in the /f/memes forum.",
"image": "__REDDIT__/submission_images/7dff99a65aa39d065b1fb284182be5fd60403d42c5fcac48ac89ff04426cf2b3.png",
"instantiation_dict": {
"hint": "I think it is in the /f/memes forum."
},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/memes/41719 |OR| __REDDIT__/f/memes/41719/where-my-6-4-gb",
"program_html": [],
"url_note": "EXACT"
},
"intent_template_id": 50,
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "hard",
"comments": "__REDDIT__/f/memes/41719/where-my-6-4-gb"
},
{
"sites": [
"reddit"
],
"task_id": 128,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you navigate me to the most commented post by the author of this image? {{hint}}",
"intent": "Can you navigate me to the most commented post by the author of this image? I think it is in the /f/pics forum.",
"image": "__REDDIT__/submission_images/39d43f006044f9ec1642678ceaa0eb97cd45e4dc6132a34c8a3c7497f5b97670.jpg",
"instantiation_dict": {
"hint": "I think it is in the /f/pics forum."
},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/pics/25017 |OR| __REDDIT__/f/pics/25017/my-oil-painting-of-mcdonald-s-filet-o-fish",
"program_html": [],
"url_note": "EXACT"
},
"intent_template_id": 50,
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "hard",
"comments": "__REDDIT__/f/pics/45579"
},
{
"sites": [
"reddit"
],
"task_id": 129,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Subscribe to the forum which is most likely to include discussions related to the image.",
"intent": "Subscribe to the forum which is most likely to include discussions related to the image.",
"image": "https://images.pexels.com/photos/671629/pexels-photo-671629.jpeg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__",
"locator": "document.querySelector('.sidebar').textContent",
"required_contents": {
"must_include": [
"MechanicalKeyboards"
]
}
}
]
},
"intent_template_id": 51,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": ""
},
{
"sites": [
"reddit"
],
"task_id": 130,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Subscribe to the forum which is most likely to include discussions related to the image.",
"intent": "Subscribe to the forum which is most likely to include discussions related to the image.",
"image": "__REDDIT__/submission_images/e9e9da852e390b2f350dba55990f012352679d753034b3a3cb084f9eb9f9d44c.jpg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__",
"locator": "document.querySelector('.sidebar').textContent",
"required_contents": {
"must_include": [
"memes"
]
}
}
]
},
"intent_template_id": 51,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": ""
},
{
"sites": [
"reddit"
],
"task_id": 131,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Subscribe to the forum which is most likely to include discussions related to the image.",
"intent": "Subscribe to the forum which is most likely to include discussions related to the image.",
"image": "https://images.pexels.com/photos/6801874/pexels-photo-6801874.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__",
"locator": "document.querySelector('.sidebar').textContent",
"required_contents": {
"must_include": [
"wallstreetbets |OR| personalfinance"
]
}
}
]
},
"intent_template_id": 51,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": ""
},
{
"sites": [
"reddit"
],
"task_id": 132,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "How many comments did the post with this image receive? I remember it being one of the top 50 hot posts of {{forum}}.",
"intent": "How many comments did the post with this image receive? I remember it being one of the top 50 hot posts of /f/nyc.",
"image": "__REDDIT__/submission_images/a6728cae79355e9d47e9f6531c5af8c2801ae976a7c62c73a3bd1711bb44ac17.jpg",
"instantiation_dict": {
"forum": "/f/nyc"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"146"
]
}
},
"intent_template_id": 52,
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "__REDDIT__/f/nyc/88264"
},
{
"sites": [
"reddit"
],
"task_id": 133,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "How many comments did the post with this image receive? I remember it being one of the top 50 hot posts of {{forum}}.",
"intent": "How many comments did the post with this image receive? I remember it being one of the top 50 hot posts of /f/ColumbiaMD.",
"image": "__REDDIT__/submission_images/16da5c8bc5b5a22446d5f8a8d10776ea9b99cb89a215f61d7fb806eb8b0cd28e.png",
"instantiation_dict": {
"forum": "/f/ColumbiaMD"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"23"
]
}
},
"intent_template_id": 52,
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "__REDDIT__/f/ColumbiaMD/75767"
},
{
"sites": [
"reddit"
],
"task_id": 134,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "How many comments did the post with this image receive? I remember it being one of the top 50 hot posts of {{forum}}.",
"intent": "How many comments did the post with this image receive? I remember it being one of the top 50 hot posts of /f/Maine.",
"image": "__REDDIT__/submission_images/3d8439cefc3104ad519424a4f5d7df87b67abfdbcbdd9151b826538d6438d5e2.jpg",
"instantiation_dict": {
"forum": "/f/Maine"
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"29"
]
}
},
"intent_template_id": 52,
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "__REDDIT__/f/Maine/120773"
},
{
"sites": [
"reddit"
],
"task_id": 135,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you search for '{{term}}' and find the post with {{number}} upvotes and includes a meme of {{item}}?",
"intent": "Can you search for 'meme' and find the post with 2 upvotes and includes a meme of three Spider-Men, a flag of the Netherlands, and a flag of Croatia?",
"image": null,
"instantiation_dict": {
"term": "meme",
"number": "2",
"item": "three Spider-Men, a flag of the Netherlands, and a flag of Croatia"
},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/memes/63334 |OR| __REDDIT__/f/memes/63334/if-i-missed-any-flags-that-look-like-these-feel-free-to-add",
"program_html": [],
"url_note": "EXACT"
},
"intent_template_id": 53,
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "hard",
"comments": "__REDDIT__/f/memes/63334"
},
{
"sites": [
"reddit"
],
"task_id": 136,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you navigate to the post with {{number}} upvotes, which includes a meme of {{item}}? {{hint}}",
"intent": "Can you navigate to the post with 307 upvotes, which includes a meme of a dog sitting in a room that is engulfed in flames and saying it is fine? I think this was posted in /f/wallstreetbets.",
"image": null,
"instantiation_dict": {
"hint": "I think this was posted in /f/wallstreetbets.",
"number": "307",
"item": "a dog sitting in a room that is engulfed in flames and saying it is fine"
},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/wallstreetbets/136131 |OR| __REDDIT__/f/wallstreetbets/136131/meirl",
"program_html": [],
"url_note": "EXACT"
},
"intent_template_id": 54,
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "hard",
"comments": "__REDDIT__/f/wallstreetbets/136131"
},
{
"sites": [
"reddit"
],
"task_id": 137,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you navigate to the post with {{number}} upvotes, which includes a meme of {{item}}? {{hint}}",
"intent": "Can you navigate to the post with 128 upvotes, which includes a meme of Squidward watching SpongeBob and Patrick through a window? I think this was posted in /f/wallstreetbets.",
"image": null,
"instantiation_dict": {
"hint": "I think this was posted in /f/wallstreetbets.",
"number": "128",
"item": "Squidward watching SpongeBob and Patrick through a window"
},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/wallstreetbets/50264 |OR| __REDDIT__/f/wallstreetbets/50264/money-vacuum-go-shhloooop",
"program_html": [],
"url_note": "EXACT"
},
"intent_template_id": 54,
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "hard",
"comments": "__REDDIT__/f/wallstreetbets/50264"
},
{
"sites": [
"reddit"
],
"task_id": 138,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Change my username to the first name of the recipient in the image.",
"intent": "Change my username to the first name of the recipient in the image.",
"image": "https://images.pexels.com/photos/6347512/pexels-photo-6347512.jpeg",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__",
"locator": "func:get_query_text(__page__, '.site-nav')",
"required_contents": {
"must_include": [
"patrick"
]
}
}
]
},
"intent_template_id": 55,
"reasoning_difficulty": "easy",
"visual_difficulty": "medium",
"overall_difficulty": "easy",
"comments": ""
},
{
"sites": [
"reddit"
],
"task_id": 139,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "I {{interaction}}. Can you help me {{action}}?",
"intent": "I don't like the author of this image from one of the hot posts in /f/memes. Can you help me block them?",
"image": "__REDDIT__/submission_images/43a7a1ca5d08b16fcb11926a7bcc4b16476ed8e62a55bdd917b1020f2510f2e2.jpg",
"instantiation_dict": {
"interaction": "don't like the author of this image from one of the hot posts in /f/memes",
"action": "block them"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__/user/MarvelsGrantMan136/block_list",
"locator": "func:get_query_text(__page__, '.site-content')",
"required_contents": {
"must_include": [
"Jamminmb"
]
}
}
]
},
"intent_template_id": 56,
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "__REDDIT__/f/memes/41741"
},
{
"sites": [
"reddit"
],
"task_id": 140,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "I {{interaction}}. Can you help me {{action}}?",
"intent": "I would like to know the author of this image, from one of the top posts in /f/pics. Can you help me send them a dm with \"Hello\"?",
"image": "__REDDIT__/submission_images/6bfbf1280d28d84a9261695f0cac5a90addaaff1174807a2b381fdc159f3ed00.jpg",
"instantiation_dict": {
"interaction": "would like to know the author of this image, from one of the top posts in /f/pics",
"action": "send them a dm with \"Hello\""
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__/messages",
"locator": "func:get_query_text(__page__, '.table')",
"required_contents": {
"must_include": [
"o0_bobbo_0o",
"Hello"
]
}
}
]
},
"intent_template_id": 56,
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "__REDDIT__/f/pics/25046"
},
{
"sites": [
"reddit"
],
"task_id": 141,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/movies/63905",
"geolocation": null,
"intent_template": "Leave a comment in this post with the release date of the movie in the image in mm-dd-yyyy format.",
"intent": "Leave a comment in this post with the release date of the movie in the image in mm-dd-yyyy format.",
"image": null,
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__/f/movies/63905",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"must_include": [
"08-11-2023"
]
}
}
]
},
"intent_template_id": 57,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "__REDDIT__/f/movies/63905"
},
{
"sites": [
"reddit"
],
"task_id": 142,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/movies/86107",
"geolocation": null,
"intent_template": "Leave a comment in this post with the release date of the movie in the image in mm-dd-yyyy format.",
"intent": "Leave a comment in this post with the release date of the movie in the image in mm-dd-yyyy format.",
"image": null,
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__/f/movies/86107",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"must_include": [
"03-03-2023"
]
}
}
]
},
"intent_template_id": 57,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "__REDDIT__/f/movies/86107"
},
{
"sites": [
"reddit"
],
"task_id": 143,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/movies/128615",
"geolocation": null,
"intent_template": "Leave a comment in this post with the release date of the movie in the image in mm-dd-yyyy format.",
"intent": "Leave a comment in this post with the release date of the movie in the image in mm-dd-yyyy format.",
"image": null,
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__/f/movies/128615",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"must_include": [
"03-31-2023"
]
}
}
]
},
"intent_template_id": 57,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "__REDDIT__/f/movies/128615"
},
{
"sites": [
"reddit"
],
"task_id": 144,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/wallstreetbets/72041",
"geolocation": null,
"intent_template": "Leave a comment in this post with the text as the trading loss of the OP in dollars.",
"intent": "Leave a comment in this post with the text as the trading loss of the OP in dollars.",
"image": null,
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__/f/wallstreetbets/72041",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"must_include": [
"31146.66 |OR| 31,146.66"
]
}
}
]
},
"intent_template_id": 58,
"reasoning_difficulty": "easy",
"visual_difficulty": "medium",
"overall_difficulty": "easy",
"comments": "__REDDIT__/f/wallstreetbets/72041"
},
{
"sites": [
"reddit"
],
"task_id": 145,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/wallstreetbets/136297",
"geolocation": null,
"intent_template": "Leave a comment in this post with the text as the trading loss of the OP in euros.",
"intent": "Leave a comment in this post with the text as the trading loss of the OP in euros.",
"image": null,
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__/f/wallstreetbets/136297",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"must_include": [
"41624.56 |OR| 41,624.56"
]
}
}
]
},
"intent_template_id": 59,
"reasoning_difficulty": "easy",
"visual_difficulty": "medium",
"overall_difficulty": "easy",
"comments": "__REDDIT__/f/wallstreetbets/136297"
},
{
"sites": [
"reddit"
],
"task_id": 146,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/wallstreetbets/50335",
"geolocation": null,
"intent_template": "Leave a comment in this post with the text as the trading loss of the OP in dollars.",
"intent": "Leave a comment in this post with the text as the trading loss of the OP in dollars.",
"image": null,
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__/f/wallstreetbets/50335",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"must_include": [
"209783.15 |OR| 209,783.15"
]
}
}
]
},
"intent_template_id": 58,
"reasoning_difficulty": "easy",
"visual_difficulty": "medium",
"overall_difficulty": "easy",
"comments": "__REDDIT__/f/wallstreetbets/50335"
},
{
"sites": [
"reddit"
],
"task_id": 147,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/food/18831",
"geolocation": null,
"intent_template": "Leave a comment in this post with the text as the number of {{item}} in the image.",
"intent": "Leave a comment in this post with the text as the number of buns in the image.",
"image": null,
"instantiation_dict": {
"item": "buns"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__/f/food/18831",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"must_include": [
"10 |OR| ten"
]
}
}
]
},
"intent_template_id": 60,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "__REDDIT__/f/food/18831"
},
{
"sites": [
"reddit"
],
"task_id": 148,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/food/60745",
"geolocation": null,
"intent_template": "Leave a comment in this post with the text as the number of {{item}} in the image.",
"intent": "Leave a comment in this post with the text as the number of arancinis in the image.",
"image": null,
"instantiation_dict": {
"item": "arancinis"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__/f/food/60745",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"must_include": [
"2 |OR| two"
]
}
}
]
},
"intent_template_id": 60,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "__REDDIT__/f/food/60745"
},
{
"sites": [
"reddit"
],
"task_id": 149,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/food/60641",
"geolocation": null,
"intent_template": "Leave a comment in this post with the text as the number of {{item}} in the image.",
"intent": "Leave a comment in this post with the text as the number of gyozas in the image.",
"image": null,
"instantiation_dict": {
"item": "gyozas"
},
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__/f/food/60641",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"must_include": [
"8 |OR| eight"
]
}
}
]
},
"intent_template_id": 60,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "__REDDIT__/f/food/60641"
},
{
"sites": [
"reddit"
],
"task_id": 150,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/OldSchoolCool/35826",
"geolocation": null,
"intent_template": "Leave a comment in this post with the text as the number of {{item}} in the image.",
"intent": "Leave a comment in this post with the text as the number of adults in the image.",
"instantiation_dict": {
"item": "adults"
},
"image": null,
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__/f/OldSchoolCool/35826",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"must_include": [
"2 |OR| two"
]
}
}
]
},
"intent_template_id": 60,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "__REDDIT__/f/OldSchoolCool/35826"
},
{
"sites": [
"reddit"
],
"task_id": 151,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/OldSchoolCool/78885",
"geolocation": null,
"intent_template": "Leave a comment in this post with the text as the number of {{item}} in the image.",
"intent": "Leave a comment in this post with the text as the number of adults in the image.",
"instantiation_dict": {
"item": "adults"
},
"image": null,
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__/f/OldSchoolCool/78885",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"must_include": [
"1 |OR| one"
]
}
}
]
},
"intent_template_id": 60,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "__REDDIT__/f/OldSchoolCool/78885"
},
{
"sites": [
"reddit"
],
"task_id": 152,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/OldSchoolCool/15059",
"geolocation": null,
"intent_template": "Leave a comment in this post with the text as the number of {{item}} in the image.",
"intent": "Leave a comment in this post with the text as the number of adults in the image.",
"instantiation_dict": {
"item": "adults"
},
"image": null,
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"program_html": [
{
"url": "__REDDIT__/f/OldSchoolCool/15059",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"must_include": [
"0 |OR| zero"
]
}
}
]
},
"intent_template_id": 60,
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "__REDDIT__/f/OldSchoolCool/15059"
},
{
"sites": [
"reddit"
],
"task_id": 153,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Is there a version of this post with the final results? If so, navigate to the comment section for it.",
"intent": "Is there a version of this post with the final results? If so, navigate to the comment section for it.",
"image": "__REDDIT__/submission_images/a1ee91708d8c09894668f30cf64c963d05993a9dcda1a3f38107868475b3abb2.png",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/dataisbeautiful/60153/oc-radial-bracket-fifa-world-cup-2022-final-updated",
"program_html": [],
"url_note": "EXACT"
},
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "",
"intent_template_id": 61
},
{
"sites": [
"reddit"
],
"task_id": 154,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "{{intro}}. Can you tell me {{information}}?{{action}}",
"intent": "I think I saw a post in f/DataIsBeautiful that talked about the price of training ImageNet models. Can you tell me how much it cost in 2020 was to train a top-5 model with >93% accuracy? Finally, take me to the comments section of the post.",
"instantiation_dict": {
"intro": "I think I saw a post in f/DataIsBeautiful that talked about the price of training ImageNet models",
"information": "how much it cost in 2020 was to train a top-5 model with >93% accuracy",
"action": " Finally, take me to the comments section of the post."
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match",
"url_match"
],
"reference_answers": {
"must_include": [
"7.43"
]
},
"reference_url": "__REDDIT__/f/dataisbeautiful/103920",
"url_note": "GOLD in PRED",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": "$7.43"
},
"reasoning_difficulty": "easy",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "__REDDIT__/f/dataisbeautiful/103920/oc-the-cost-of-training-ai-on-imagenet-has-decreased-from",
"intent_template_id": 62
},
{
"sites": [
"reddit"
],
"task_id": 155,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/pics",
"geolocation": null,
"intent_template": "{{intro}}. Can you navigate to the comments section of a post {{description}}?",
"intent": "I saw a really funny movie last week (the image is the movie poster). Can you navigate to the comments section of a post that has the picture that was the inspiration for the IRS scene?",
"image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Everything_Everywhere_All_at_Once.jpg.webp",
"instantiation_dict": {
"intro": "I saw a really funny movie last week (the image is the movie poster)",
"description": "that has the picture that was the inspiration for the IRS scene"
},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/pics/131632",
"program_html": [],
"url_note": "GOLD in PRED"
},
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "",
"intent_template_id": 63
},
{
"sites": [
"reddit"
],
"task_id": 156,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "{{intro}}. Can you tell me {{information}}?{{action}}",
"intent": "I saw a fun Mario and Princess Peach cosplay in f/gaming. Can you tell me the top comment for that post?",
"instantiation_dict": {
"intro": "I saw a fun Mario and Princess Peach cosplay in f/gaming",
"information": "the top comment for that post",
"action": ""
},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"Mama mia!"
]
},
"reference_url": "__REDDIT__/f/gaming/19625/finally-found-her-and-all-i-got-was-a-kiss-on-the-cheek",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": "Mama mia!"
},
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "",
"intent_template_id": 62
},
{
"sites": [
"reddit"
],
"task_id": 157,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/Music",
"geolocation": null,
"intent_template": "Can you take me to the comments section of {{post}} {{condition}}?",
"intent": "Can you take me to the comments section of the post in this subreddit that talks about how the person who wrote this symphony died?",
"image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Eroica_Beethoven_title.jpg.webp",
"instantiation_dict": {
"post": "the post in this subreddit",
"condition": "that talks about how the person who wrote this symphony died"
},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/Music/121269/ludwig-von-beethoven-s-dna-reveals-he-probably-died-of-liver",
"program_html": [],
"url_note": "EXACT"
},
"reasoning_difficulty": "easy",
"visual_difficulty": "hard",
"overall_difficulty": "medium",
"comments": "Text is very hard to read, even for a human.",
"intent_template_id": 64
},
{
"sites": [
"reddit"
],
"task_id": 158,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you subscribe to all subreddits that start with the letter '{{letter}}' and have a {{object}} image in their top 3 posts of all time?",
"instantiation_dict": {
"letter": "a",
"object": "dog"
},
"intent": "Can you subscribe to all subreddits that start with the letter 'a' and have a dog image in their top 3 posts of all time?",
"require_reset": true,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__",
"locator": "func:get_query_text(__page__, \"#sidebar > section > ul\")",
"required_contents": {
"must_include": [
"aww"
],
"must_exclude": [
"allentown",
"arlingtonva",
"Art",
"AskReddit",
"askscience"
]
}
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "",
"intent_template_id": 65
},
{
"sites": [
"reddit"
],
"task_id": 159,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you subscribe to all subreddits that start with the letter '{{letter}}' and have a {{object}} image in their top 3 posts of all time?",
"instantiation_dict": {
"letter": "n",
"object": "downtown"
},
"intent": "Can you subscribe to all subreddits that start with the letter 'n' and have a downtown image in their top 3 posts of all time?",
"require_reset": true,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__",
"locator": "func:get_query_text(__page__, \"#sidebar > section > ul\")",
"required_contents": {
"must_include": [
"Newark",
"nyc"
],
"must_exclude": [
"newhampshire",
"newhaven",
"newjersey",
"news",
"nosleep",
"nottheonion"
]
}
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "",
"intent_template_id": 65
},
{
"sites": [
"reddit"
],
"task_id": 160,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you subscribe to all subreddits that start with the letter '{{letter}}' and have a {{object}} image in their top 3 posts of all time?",
"instantiation_dict": {
"letter": "i",
"object": "female usb to male lightning connector"
},
"intent": "Can you subscribe to all subreddits that start with the letter 'i' and have a female usb to male lightning connector image in their top 3 posts of all time?",
"require_reset": true,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__",
"locator": "func:get_query_text(__page__, \"#sidebar > section > ul\")",
"required_contents": {
"must_exclude": [
"IAmA",
"InternetIsBeautiful",
"iphone"
]
}
}
]
},
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "medium",
"comments": "",
"intent_template_id": 65
},
{
"sites": [
"reddit"
],
"task_id": 161,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/Jokes",
"geolocation": null,
"intent_template": "Take me to the post for the joke on this page that is most likely to take place in the following image?",
"intent": "Take me to the post for the joke on this page that is most likely to take place in the following image?",
"image": "https://images.pexels.com/photos/4969841/pexels-photo-4969841.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/Jokes/55249/a-priest-and-a-nun-in-a-desert-cabin",
"program_html": [],
"url_note": "EXACT"
},
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "The post mispells 'deserted' as 'desert' which makes it hard to find.",
"intent_template_id": 66
},
{
"sites": [
"reddit"
],
"task_id": 162,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you give me the URL for a gif I could send to my friend about the contrast of investing in a retirement account vs a brokerage account from the hot posts of /f/wallstreetbets?",
"intent": "Can you give me the URL for a gif I could send to my friend about the contrast of investing in a retirement account vs a brokerage account from the hot posts of /f/wallstreetbets?",
"instantiation_dict": {},
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"__REDDIT__/submission_images/73199932fc7c826edb8313f0993483b6982ea7d3ea368e8d904eab71cb642e2e.gif"
]
},
"reference_url": "",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": "__REDDIT__/submission_images/73199932fc7c826edb8313f0993483b6982ea7d3ea368e8d904eab71cb642e2e.gif"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "",
"intent_template_id": 67
},
{
"sites": [
"reddit",
"wikipedia"
],
"task_id": 163,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__ |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Up_(2009_film)",
"geolocation": null,
"intent_template": "What is the top comment for the most popular post of all time (across all forums) containing {{subject}} in the Wikipedia tab?",
"instantiation_dict": {
"subject": "a character from the movie"
},
"intent": "What is the top comment for the most popular post of all time (across all forums) containing a character from the movie in the Wikipedia tab?",
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"https://i.imgur.com/zmkfQ8F"
]
},
"reference_url": "",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": "https://i.imgur.com/zmkfQ8F"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "Post is __REDDIT__/f/photoshopbattles/45340/psbattle-halloween-costume",
"intent_template_id": 68
},
{
"sites": [
"reddit",
"wikipedia"
],
"task_id": 164,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__ |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Digging",
"geolocation": null,
"intent_template": "What is the top comment for the most popular post of all time (across all forums) containing {{subject}} in the Wikipedia tab?",
"instantiation_dict": {
"subject": "a dog trying to perform the action"
},
"intent": "What is the top comment for the most popular post of all time (across all forums) containing a dog trying to perform the action in the Wikipedia tab?",
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"I\u2019ve found that Dachshunds are rarely accused of having small or light personalities."
]
},
"reference_url": "",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": "I\u2019ve found that Dachshunds are rarely accused of having small or light personalities."
},
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "medium",
"comments": "Post is __REDDIT__/f/gifs/19936/ok-time-for-you-to-go-to-bed hard visual because the agent needs to understand a moving gif",
"intent_template_id": 68
},
{
"sites": [
"reddit"
],
"task_id": 165,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you find {{post}} and message the poster '{{message}}'?",
"image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Fallingwater3.jpg.webp",
"instantiation_dict": {
"post": "art of the building in the picture",
"message": "I really like this picture! Can I print it out and hang it on my wall?"
},
"intent": "Can you find art of the building in the picture and message the poster 'I really like this picture! Can I print it out and hang it on my wall?'?",
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/messages",
"locator": "func:get_query_text(__page__, \"#main > table > tbody\")",
"required_contents": {
"must_include": [
"put_the_ux_in_sux",
"I really like this picture! Can I print it out and hang it on my wall?"
]
}
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "__REDDIT__/f/pittsburgh/89846/fallingwater-by-lisa-frank-lloyd-wright",
"intent_template_id": 69
},
{
"sites": [
"reddit"
],
"task_id": 166,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you find {{post}} and message the poster '{{message}}'?",
"instantiation_dict": {
"post": "a post of the International Space Station's solar panels with the Earth in the background",
"message": "Was the sun super bright in space?"
},
"intent": "Can you find a post of the International Space Station's solar panels with the Earth in the background and message the poster 'Was the sun super bright in space?'?",
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/messages",
"locator": "func:get_query_text(__page__, \"#main > table > tbody\")",
"required_contents": {
"must_include": [
"astro_pettit |OR| Hazelil",
"Was the sun super bright in space?"
]
}
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "__REDDIT__/f/space/27376/a-view-i-captured-of-the-sun-between-iss-solar-panels-more |OR| __REDDIT__/f/space/92045/ireland-seen-from-the-international-space-station",
"intent_template_id": 69
},
{
"sites": [
"reddit"
],
"task_id": 167,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "{{intro}}. Can you navigate to the comments section of a post {{description}}?",
"instantiation_dict": {
"intro": "I am feeling sad today",
"description": "that has an image of a dog and a cat to cheer me up"
},
"intent": "I am feeling sad today. Can you navigate to the comments section of a post that has an image of a dog and a cat to cheer me up?",
"require_reset": false,
"eval": {
"eval_types": [
"page_image_query"
],
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__row",
"eval_vqa": [
{
"question": "Is there a cat in this photo? (yes/no)",
"answer": "yes"
},
{
"question": "Is there a dog in this photo? (yes/no)",
"answer": "yes"
}
]
}
],
"reference_url": "",
"program_html": [],
"string_note": ""
},
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "An example is at __REDDIT__/f/aww/81028/cute-buddies",
"intent_template_id": 63
},
{
"sites": [
"reddit"
],
"task_id": 168,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/pics?next%5Branking%5D=1773&next%5Bid%5D=67091",
"geolocation": null,
"intent_template": "Can you repost the image on this page that would fit best in {{subreddit}} with the same title and text as the original post and take me to the comments section of the post?",
"instantiation_dict": {
"subreddit": "f/EarthPorn"
},
"intent": "Can you repost the image on this page that would fit best in f/EarthPorn with the same title and text as the original post and take me to the comments section of the post?",
"require_reset": true,
"eval": {
"eval_types": [
"program_html",
"page_image_query"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "last",
"locator": "func:get_query_text(__page__, \"#main > article > div > div.submission__inner > header\")",
"required_contents": {
"must_include": [
"Aurora Borealis looking in the Direction to Svalbard flying from Lahore to Toronto (my pic) \ud83c\udf42\ud83d\udcab\u2728",
"MarvelsGrantMan136",
"EarthPorn"
]
}
}
],
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__image",
"eval_fuzzy_image_match": "__REDDIT__/submission_images/1972dfbb29553ecc270ae83ab679d218f425d478136f1f524a09047e299a2cef.jpg"
}
]
},
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "",
"intent_template_id": 70
},
{
"sites": [
"reddit"
],
"task_id": 169,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/pics/top?t=all&next%5BnetScore%5D=589&next%5Bid%5D=67196",
"geolocation": null,
"intent_template": "Can you repost the image on this page that would fit best in {{subreddit}} with the same title and text as the original post and take me to the comments section of the post?",
"instantiation_dict": {
"subreddit": "f/sports"
},
"intent": "Can you repost the image on this page that would fit best in f/sports with the same title and text as the original post and take me to the comments section of the post?",
"require_reset": false,
"eval": {
"eval_types": [
"program_html",
"page_image_query"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "last",
"locator": "func:get_query_text(__page__, \"#main > article > div > div.submission__inner > header\")",
"required_contents": {
"must_include": [
"Pele, Ali, Jordan, Montana, Brown, Kareem, Evert, King, Nicklaus, Russell, Elway and Holyfield",
"MarvelsGrantMan136",
"sports"
]
}
}
],
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__image",
"eval_fuzzy_image_match": "__REDDIT__/submission_images/539a1a709c696de5b7c3bb8d8df51fd2b5cff90b91de30da1e6e13f88b6d71a6.jpg"
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "",
"intent_template_id": 70
},
{
"sites": [
"reddit"
],
"task_id": 170,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you subscribe to the forum {{condition}}?",
"image": "https://images.pexels.com/photos/1884574/pexels-photo-1884574.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1",
"instantiation_dict": {
"condition": "that is most likely to contain posts like this"
},
"intent": "Can you subscribe to the forum that is most likely to contain posts like this?",
"require_reset": true,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__",
"locator": "func:get_query_text(__page__, \"#sidebar > section > ul\")",
"required_contents": {
"must_include": [
"sports"
]
}
}
]
},
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "",
"intent_template_id": 71
},
{
"sites": [
"reddit"
],
"task_id": 171,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you subscribe to the forum {{condition}}?",
"image": "https://images.pexels.com/photos/2115257/pexels-photo-2115257.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1",
"instantiation_dict": {
"condition": "that is most likely to contain posts like this"
},
"intent": "Can you subscribe to the forum that is most likely to contain posts like this?",
"require_reset": true,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__",
"locator": "func:get_query_text(__page__, \"#sidebar > section > ul\")",
"required_contents": {
"must_include": [
"MechanicalKeyboards"
]
}
}
]
},
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "",
"intent_template_id": 71
},
{
"sites": [
"reddit"
],
"task_id": 172,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/jerseycity",
"geolocation": null,
"intent_template": "Can you post a comment on the post {{description}} with the text '{{comment}}'?",
"instantiation_dict": {
"description": "that has a crispy chicken sandwhich",
"comment": "I love those things!"
},
"intent": "Can you post a comment on the post that has a crispy chicken sandwhich with the text 'I love those things!'?",
"require_reset": true,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/f/jerseycity/62526/because-all-we-have-in-life-is-our-word-and-fake-internet",
"locator": "func:get_query_text(__page__, \"#main\")",
"required_contents": {
"must_include": [
"MarvelsGrantMan136",
"I love those things!"
]
}
}
]
},
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "medium",
"comments": "Occulded text is hard to read.",
"intent_template_id": 72
},
{
"sites": [
"reddit"
],
"task_id": 173,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/memes/top?t=all",
"geolocation": null,
"intent_template": "Can you post a comment on the post {{description}} with the text '{{comment}}'?",
"instantiation_dict": {
"description": "with an \ud83d\udc7d in it",
"comment": "Scary!"
},
"intent": "Can you post a comment on the post with an \ud83d\udc7d in it with the text 'Scary!'?",
"require_reset": true,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/f/memes/21032/at-lease-is-not-brazil",
"locator": "func:get_query_text(__page__, \"#main\")",
"required_contents": {
"must_include": [
"MarvelsGrantMan136",
"Scary!"
]
}
}
]
},
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "",
"intent_template_id": 72
},
{
"sites": [
"reddit"
],
"task_id": 174,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/gifs",
"geolocation": null,
"intent_template": "Can you post a comment on the post {{description}} with the text '{{comment}}'?",
"instantiation_dict": {
"description": "with a \ud83d\udef9 in it",
"comment": "\ud83c\udfce\ufe0f\ud83c\udfce\ufe0f\ud83c\udfce\ufe0f"
},
"intent": "Can you post a comment on the post with a \ud83d\udef9 in it with the text '\ud83c\udfce\ufe0f\ud83c\udfce\ufe0f\ud83c\udfce\ufe0f'?",
"require_reset": true,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/f/gifs/19938/what-goes-around-comes-around",
"locator": "func:get_query_text(__page__, \"#main\")",
"required_contents": {
"must_include": [
"MarvelsGrantMan136",
"\ud83c\udfce\ufe0f\ud83c\udfce\ufe0f\ud83c\udfce\ufe0f"
]
}
}
]
},
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "medium",
"comments": "Needs to see animated gif to complete task.",
"intent_template_id": 72
},
{
"sites": [
"reddit"
],
"task_id": 175,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "How much money was user {{user}}'s {{security}} worth {{timeframe}} in their screenshot (in {{currency}})?",
"instantiation_dict": {
"user": "cezapiza",
"security": "SPY puts",
"timeframe": "the day before",
"currency": "USD"
},
"intent": "How much money was user cezapiza's SPY puts worth the day before in their screenshot (in USD)?",
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"360"
]
},
"reference_url": "",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": "$360"
},
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "Post is __REDDIT__/f/wallstreetbets/115267/up-60-on-my-spy-puts-today",
"intent_template_id": 73
},
{
"sites": [
"reddit"
],
"task_id": 176,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/food",
"geolocation": null,
"intent_template": "Can you comment on the food from the country in the image and about {{description}} saying '{{comment}}'?",
"instantiation_dict": {
"description": "\ud83c\udf5c",
"comment": "Sooo tasty!"
},
"image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Extreme_points_japan_map.png.webp",
"intent": "Can you comment on the food from the country in the image and about \ud83c\udf5c saying 'Sooo tasty!'?",
"require_reset": true,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/f/food/104562/i-ate-tonkotsu-ramen",
"locator": "func:get_query_text(__page__, \"#main\")",
"required_contents": {
"must_include": [
"MarvelsGrantMan136",
"Sooo tasty!"
]
}
}
]
},
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "",
"intent_template_id": 74
},
{
"sites": [
"reddit"
],
"task_id": 177,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/food?next%5Branking%5D=1727&next%5Bid%5D=82778",
"geolocation": null,
"intent_template": "Can you comment on the food from the country in the image and about {{description}} saying '{{comment}}'?",
"instantiation_dict": {
"description": "a dog in the background",
"comment": "Someone is looking for a bite \ud83d\udc36"
},
"image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/South_Korea_(orthographic_projection).svg.png.webp",
"intent": "Can you comment on the food from the country in the image and about a dog in the background saying 'Someone is looking for a bite \ud83d\udc36'?",
"require_reset": true,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/f/food/82838/tteukboki-homemade",
"locator": "func:get_query_text(__page__, \"#main\")",
"required_contents": {
"must_include": [
"MarvelsGrantMan136",
"Someone is looking for a bite \ud83d\udc36"
]
}
}
]
},
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "medium",
"comments": "",
"intent_template_id": 74
},
{
"sites": [
"reddit"
],
"task_id": 178,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you subscribe to the forum {{condition}}?",
"image": "https://images.pexels.com/photos/7435463/pexels-photo-7435463.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1",
"instantiation_dict": {
"condition": "for this city"
},
"intent": "Can you subscribe to the forum for this city?",
"require_reset": true,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__",
"locator": "func:get_query_text(__page__, \"#sidebar > section > ul\")",
"required_contents": {
"must_include": [
"nyc"
]
}
}
]
},
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "The image is of the 'Vessel' in New York City.",
"intent_template_id": 71
},
{
"sites": [
"reddit"
],
"task_id": 179,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you take me to {{sorting}} in the forum for {{location}}?",
"instantiation_dict": {
"sorting": "the page that shows the most controversial posts of the past month",
"location": "a city located in the state pictured in the image"
},
"image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Map_of_USA_MO.svg.png.webp",
"intent": "Can you take me to the page that shows the most controversial posts of the past month in the forum for a city located in the state pictured in the image?",
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/springfieldMO/controversial?t=month",
"program_html": [],
"url_note": "EXACT"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "",
"intent_template_id": 75
},
{
"sites": [
"reddit"
],
"task_id": 180,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you take me to {{sorting}} in the forum for {{location}}?",
"instantiation_dict": {
"sorting": "the newest posts",
"location": "the highlighted county in the image"
},
"image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Map_of_Virginia_highlighting_Arlington_County.svg.png.webp",
"intent": "Can you take me to the newest posts in the forum for the highlighted county in the image?",
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/arlingtonva/new",
"program_html": [],
"url_note": "EXACT"
},
"reasoning_difficulty": "easy",
"visual_difficulty": "hard",
"overall_difficulty": "medium",
"comments": "",
"intent_template_id": 75
},
{
"sites": [
"reddit"
],
"task_id": 181,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "I am a student at the school in the image. Can you make a post asking for the best {{location}} in the forum for the city my school is in and navigate to it?",
"instantiation_dict": {
"location": "pizza"
},
"image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Yale_family_chrest.jpg.webp",
"intent": "I am a student at the school in the image. Can you make a post asking for the best pizza in the forum for the city my school is in and navigate to it?",
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "last",
"locator": "func:get_query_text(__page__, \"#main > article > div > div.submission__inner > header\")",
"required_contents": {
"must_include": [
"pizza",
"newhaven",
"MarvelsGrantMan136"
]
}
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "The text in the image looks like 'Vale' but the agent needs to know it's Yale.",
"intent_template_id": 76
},
{
"sites": [
"reddit"
],
"task_id": 182,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "I am a student at the school in the image. Can you make a post asking for the best {{location}} in the forum for the city my school is in and navigate to it?",
"instantiation_dict": {
"location": "Indian food"
},
"image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Stanstadium_view.jpg.webp",
"intent": "I am a student at the school in the image. Can you make a post asking for the best Indian food in the forum for the city my school is in and navigate to it?",
"require_reset": true,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"fuzzy_match": "N/A"
},
"string_note": "There is no forum for Palo Alto, where Stanford is located.",
"reference_url": ""
},
"reasoning_difficulty": "hard",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "Hard to read text off the football field and has to recognize that StamfordCT is not where Stanford is located. Also there is no forum for Palo Alto.",
"intent_template_id": 76
},
{
"sites": [
"reddit",
"shopping"
],
"task_id": 183,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__ |AND| __SHOPPING__/grocery-gourmet-food/fresh-meal-kits.html?p=5&product_list_order=name",
"geolocation": null,
"intent_template": "Can you make a post in f/food, title it '{{title}}', add the full sized version of the image with {{food}}, and navigate to it?",
"instantiation_dict": {
"title": "[I ate] this",
"food": "the pop tart on this page of OneStopMarket"
},
"intent": "Can you make a post in f/food, title it '[I ate] this', add the full sized version of the image with the pop tart on this page of OneStopMarket, and navigate to it?",
"require_reset": false,
"eval": {
"eval_types": [
"program_html",
"page_image_query"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "last",
"locator": "func:get_query_text(__page__, \"#main > article > div > div.submission__inner > header\")",
"required_contents": {
"must_include": [
"[I ate] this",
"MarvelsGrantMan136",
"food"
]
}
}
],
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__image",
"eval_fuzzy_image_match": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B08JQRPJBR.0.jpg"
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "",
"intent_template_id": 77
},
{
"sites": [
"reddit",
"shopping"
],
"task_id": 184,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__ |AND| __SHOPPING__/grocery-gourmet-food/dairy-cheese-eggs/cheese.html?p=4&product_list_order=name",
"geolocation": null,
"intent_template": "Can you make a post in f/food, title it '{{title}}', add the full sized version of the image with {{food}}, and navigate to it?",
"instantiation_dict": {
"title": "[HOMEMADE] this yummy cheese",
"food": "the semicircle cheese"
},
"intent": "Can you make a post in f/food, title it '[HOMEMADE] this yummy cheese', add the full sized version of the image with the semicircle cheese, and navigate to it?",
"require_reset": false,
"eval": {
"eval_types": [
"program_html",
"page_image_query"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "last",
"locator": "func:get_query_text(__page__, \"#main > article > div > div.submission__inner > header\")",
"required_contents": {
"must_include": [
"[HOMEMADE] this yummy cheese",
"MarvelsGrantMan136",
"food"
]
}
}
],
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__image",
"eval_fuzzy_image_match": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B009P9HODS.0.jpg |OR| media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B009P9HODS.1.jpg"
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "",
"intent_template_id": 77
},
{
"sites": [
"reddit",
"shopping",
"wikipedia"
],
"task_id": 185,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__ |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Pok\u00e9mon |AND| __SHOPPING__/video-games/legacy-systems/playstation-systems.html?p=3",
"geolocation": null,
"intent_template": "Can you make a post in {{forum}} titled '{{title}}', add the full sized version of the image on the shopping page most related to the wikipedia page? Finally, navigate to the comments section of it.",
"instantiation_dict": {
"forum": "the /f/gaming forum",
"title": "Should I buy this? I can get a 25% discount"
},
"intent": "Can you make a post in the /f/gaming forum titled 'Should I buy this? I can get a 25% discount', add the full sized version of the image on the shopping page most related to the wikipedia page? Finally, navigate to the comments section of it.",
"require_reset": false,
"eval": {
"eval_types": [
"program_html",
"page_image_query"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "last",
"locator": "func:get_query_text(__page__, \"#main > article > div > div.submission__inner > header\")",
"required_contents": {
"must_include": [
"Should I buy this? I can get a 25% discount",
"MarvelsGrantMan136",
"gaming"
]
}
}
],
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__image",
"eval_fuzzy_image_match": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B07CMWR38H.0.jpg"
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "",
"intent_template_id": 78
},
{
"sites": [
"reddit",
"shopping",
"wikipedia"
],
"task_id": 186,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__ |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Rudolph_the_Red-Nosed_Reindeer%3A_The_Movie |AND| __SHOPPING__/sports-outdoors/sports.html?p=5",
"geolocation": null,
"intent_template": "Can you make a post in {{forum}} titled '{{title}}', add the full sized version of the image on the shopping page most related to the wikipedia page? Finally, navigate to the comments section of it.",
"instantiation_dict": {
"forum": "the /f/aww forum",
"title": "Perfect for the winter holidays!"
},
"intent": "Can you make a post in the /f/aww forum titled 'Perfect for the winter holidays!', add the full sized version of the image on the shopping page most related to the wikipedia page? Finally, navigate to the comments section of it.",
"require_reset": false,
"eval": {
"eval_types": [
"program_html",
"page_image_query"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "last",
"locator": "func:get_query_text(__page__, \"#main > article > div > div.submission__inner > header\")",
"required_contents": {
"must_include": [
"Perfect for the winter holidays!",
"MarvelsGrantMan136",
"aww"
]
}
}
],
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__image",
"eval_fuzzy_image_match": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09KVCRK1S.0.jpg |OR| __SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09KVCRK1S.1.jpg"
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "",
"intent_template_id": 78
},
{
"sites": [
"reddit",
"shopping",
"wikipedia"
],
"task_id": 187,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__ |AND| __WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/A/Yoda |AND| __SHOPPING__/home-kitchen/bedding/kids-bedding.html?product_list_order=name&product_list_dir=desc",
"geolocation": null,
"intent_template": "Can you make a post in {{forum}} titled '{{title}}', add the full sized version of the image on the shopping page most related to the wikipedia page? Finally, navigate to the comments section of it.",
"instantiation_dict": {
"forum": "the forum for the capital of Rhode Island (not the one for the state)",
"title": "Perfect for the cold weather"
},
"intent": "Can you make a post in the forum for the capital of Rhode Island (not the one for the state) titled 'Perfect for the cold weather', add the full sized version of the image on the shopping page most related to the wikipedia page? Finally, navigate to the comments section of it.",
"require_reset": false,
"eval": {
"eval_types": [
"program_html",
"page_image_query"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "last",
"locator": "func:get_query_text(__page__, \"#main > article > div > div.submission__inner > header\")",
"required_contents": {
"must_include": [
"Perfect for the cold weather",
"MarvelsGrantMan136",
"providence"
]
}
}
],
"page_image_query": [
{
"eval_image_url": "last",
"eval_image_class": ".submission__image",
"eval_fuzzy_image_match": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09LQ95N1N.0.jpg |OR| __SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09LQ95N1N.1.jpg"
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "",
"intent_template_id": 78
},
{
"sites": [
"reddit"
],
"task_id": 188,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you subscribe me to the most suitable forum where I can find more posts like in the image?",
"image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Apple_iPhone_12_Pro_-_Cameras_(50535314721).jpg.webp",
"instantiation_dict": {},
"intent": "Can you subscribe me to the most suitable forum where I can find more posts like in the image?",
"require_reset": true,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__",
"locator": "func:get_query_text(__page__, \"#sidebar > section > ul\")",
"required_contents": {
"must_include": [
"iphone |OR| technology"
]
}
}
]
},
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "",
"intent_template_id": 79
},
{
"sites": [
"reddit"
],
"task_id": 189,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you subscribe me to the most suitable forum where I can find more posts like in the image?",
"image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Typical_cnn.png.webp",
"instantiation_dict": {},
"intent": "Can you subscribe me to the most suitable forum where I can find more posts like in the image?",
"require_reset": true,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__",
"locator": "func:get_query_text(__page__, \"#sidebar > section > ul\")",
"required_contents": {
"must_include": [
"MachineLearning |OR| deeplearning |OR| singularity"
]
}
}
]
},
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "",
"intent_template_id": 79
},
{
"sites": [
"reddit"
],
"task_id": 190,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you subscribe me to the most suitable forum where I can find more posts like in the image?",
"image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Pieter_Aertsen_-_Market_Scene_-_Google_Art_ProjectFXD.jpg.webp",
"instantiation_dict": {},
"intent": "Can you subscribe me to the most suitable forum where I can find more posts like in the image?",
"require_reset": true,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__",
"locator": "func:get_query_text(__page__, \"#sidebar > section > ul\")",
"required_contents": {
"must_include": [
"Art"
]
}
}
]
},
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "",
"intent_template_id": 79
},
{
"sites": [
"reddit"
],
"task_id": 191,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/iphone/top?t=all",
"geolocation": null,
"intent_template": "What is the sum of the number of upvotes for the posts on this page that have {{item}}?",
"instantiation_dict": {
"item": "any part of a physical phone"
},
"intent": "What is the sum of the number of upvotes for the posts on this page that have any part of a physical phone?",
"require_reset": true,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"3463 |OR| 3,463"
]
},
"reference_url": "",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": "3463"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "medium",
"comments": "1) __REDDIT__/f/iphone/106148/apple-really-should-bring-back-the-rose-gold-color-option-it 2) __REDDIT__/f/iphone/41010/strong-magnets 3) __REDDIT__/f/iphone/62333/this-mesh-thing-fell-out-of-my-iphone-13-pro-is-it-a-big",
"intent_template_id": 80
},
{
"sites": [
"reddit"
],
"task_id": 192,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/aww/most_commented?t=all",
"geolocation": null,
"intent_template": "What is the sum of the number of upvotes for the posts on this page that have {{item}}?",
"instantiation_dict": {
"item": "three animals"
},
"intent": "What is the sum of the number of upvotes for the posts on this page that have three animals?",
"require_reset": true,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"3566 |OR| 3,566"
]
},
"reference_url": "",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": "3566"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "1) __REDDIT__/f/aww/80942/oc-what-do-you-suspect-they-want 2) __REDDIT__/f/aww/102762/they-think-the-couch-is-theirs-oc",
"intent_template_id": 80
},
{
"sites": [
"reddit"
],
"task_id": 193,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/EarthPorn/top?t=all",
"geolocation": null,
"intent_template": "Can you comment '{{comment}}' on the post with {{item}} posted {{timeframe}}?",
"instantiation_dict": {
"comment": "I love this!",
"item": "a blue/turquoise aurora",
"timeframe": "in the fall"
},
"intent": "Can you comment 'I love this!' on the post with a blue/turquoise aurora posted in the fall?",
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/f/EarthPorn/12673/image-of-northern-lights-i-shot-in-senja-norway-2-weeks-ag",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"exact_match": "I love this!"
}
}
]
},
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "",
"intent_template_id": 81
},
{
"sites": [
"reddit"
],
"task_id": 194,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/MechanicalKeyboards/most_commented?t=all",
"geolocation": null,
"intent_template": "Can you comment '{{comment}}' on the post with {{item}} posted {{timeframe}}?",
"instantiation_dict": {
"comment": "This looks really fun to type on!",
"item": "a single keyboard with a number pad",
"timeframe": "on Halloween (in UTC)"
},
"intent": "Can you comment 'This looks really fun to type on!' on the post with a single keyboard with a number pad posted on Halloween (in UTC)?",
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/f/MechanicalKeyboards/14466/any-thoughts-on-layout-idea",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"exact_match": "This looks really fun to type on!"
}
}
]
},
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "medium",
"comments": "Has to look at a lot of images, and some have unexpected layouts.",
"intent_template_id": 81
},
{
"sites": [
"reddit"
],
"task_id": 195,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/MechanicalKeyboards/top?t=all&next%5BnetScore%5D=2415&next%5Bid%5D=78388",
"geolocation": null,
"intent_template": "Can you comment '{{comment}}' on the post with {{item}} posted {{timeframe}}?",
"instantiation_dict": {
"comment": "Wish I had this for my setup",
"item": "a pink escape key",
"timeframe": "after 8pm UTC"
},
"intent": "Can you comment 'Wish I had this for my setup' on the post with a pink escape key posted after 8pm UTC?",
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/f/MechanicalKeyboards/120858/literally-found-this-guy-in-a-giant-ewaste-pile-at-my",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"exact_match": "Wish I had this for my setup"
}
}
]
},
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "medium",
"comments": "",
"intent_template_id": 81
},
{
"sites": [
"reddit"
],
"task_id": 196,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you take me to the comments section of {{post}} {{condition}}?",
"instantiation_dict": {
"post": "the hottest post (of all time) in the art forum",
"condition": "where the post is only in black and white"
},
"intent": "Can you take me to the comments section of the hottest post (of all time) in the art forum where the post is only in black and white?",
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/Art/30842/moods-of-maizie-by-me-pen-2022",
"program_html": [],
"url_note": "EXACT"
},
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "",
"intent_template_id": 64
},
{
"sites": [
"reddit"
],
"task_id": 197,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you take me to the comments section of {{post}} {{condition}}?",
"instantiation_dict": {
"post": "the most commented post (of all time) in f/food",
"condition": "where the food is vegetarian"
},
"intent": "Can you take me to the comments section of the most commented post (of all time) in f/food where the food is vegetarian?",
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/food/18823/i-ate-maple-pecan-croissant",
"program_html": [],
"url_note": "EXACT"
},
"reasoning_difficulty": "medium",
"visual_difficulty": "easy",
"overall_difficulty": "medium",
"comments": "",
"intent_template_id": 64
},
{
"sites": [
"reddit"
],
"task_id": 198,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you comment '{{comment}}' on the top post (of all time) in {{forum}} that relates to the image?",
"instantiation_dict": {
"comment": "\ud80c\udc20\ud80c\udc20",
"forum": "f/history"
},
"image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/M\u00e5rten_Eskil_Winge_-_Tor's_Fight_with_the_Giants_-_Google_Art_Project.jpg.webp",
"intent": "Can you comment '\ud80c\udc20\ud80c\udc20' on the top post (of all time) in f/history that relates to the image?",
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/f/history/20281/thor-s-hammer-amulet-discovered-in-sweden-arkeonews",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"exact_match": "\ud80c\udc20\ud80c\udc20"
}
}
]
},
"reasoning_difficulty": "medium",
"visual_difficulty": "medium",
"overall_difficulty": "medium",
"comments": "",
"intent_template_id": 82
},
{
"sites": [
"reddit"
],
"task_id": 199,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__",
"geolocation": null,
"intent_template": "Can you comment '{{comment}}' on the top post (of all time) in {{forum}} that relates to the image?",
"instantiation_dict": {
"comment": "I remember seeing him in person, one of the best nights of my life.",
"forum": "f/music"
},
"image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Michael_Jackson_first_moonwalk_1983.gif",
"intent": "Can you comment 'I remember seeing him in person, one of the best nights of my life.' on the top post (of all time) in f/music that relates to the image?",
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/f/Music/35453/michael-jackson-s-thriller-returns-to-billboard-top-10-after",
"locator": "func:reddit_get_latest_comment_content_by_username(__page__, 'MarvelsGrantMan136')",
"required_contents": {
"exact_match": "I remember seeing him in person, one of the best nights of my life."
}
}
]
},
"reasoning_difficulty": "medium",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "",
"intent_template_id": 82
},
{
"sites": [
"reddit"
],
"task_id": 200,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/explainlikeimfive",
"geolocation": null,
"intent_template": "Can you take me to the post on this page that is most related to the picture on this page?",
"instantiation_dict": {},
"image": "https://images.pexels.com/photos/8599635/pexels-photo-8599635.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2",
"intent": "Can you take me to the post on this page that is most related to the picture on this page?",
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/explainlikeimfive/125106/eli5-why-are-pickles-not-just-called-pickled-cucumbers",
"program_html": [],
"url_note": "EXACT"
},
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "",
"intent_template_id": 83
},
{
"sites": [
"reddit"
],
"task_id": 201,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/explainlikeimfive",
"geolocation": null,
"intent_template": "Can you take me to the post on this page that is most related to the picture on this page?",
"instantiation_dict": {},
"image": "https://images.pexels.com/photos/4045606/pexels-photo-4045606.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2",
"intent": "Can you take me to the post on this page that is most related to the picture on this page?",
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/explainlikeimfive/60436/eli5-why-does-putting-one-foot-out-from-under-the-blankets",
"program_html": [],
"url_note": "EXACT"
},
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "",
"intent_template_id": 83
},
{
"sites": [
"reddit"
],
"task_id": 202,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/Art?next%5Branking%5D=10246&next%5Bid%5D=51658",
"geolocation": null,
"intent_template": "Can you message the users who posted {{condition}} on this page with '{{message}}'?",
"instantiation_dict": {
"condition": "art with an animal (including stuffed animals)",
"message": "Animals are so cute, and I really like your work!"
},
"intent": "Can you message the users who posted art with an animal (including stuffed animals) on this page with 'Animals are so cute, and I really like your work!'?",
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/messages",
"locator": "func:get_query_text(__page__, \"#main > table > tbody\")",
"required_contents": {
"must_include": [
"iwouldtradetheworld",
"10seas",
"cryptowallet77",
"sarcasticsnowleopard",
"kookaburra_kool",
"Psycho_Ravager",
"houselegs",
"sasquatchinheat",
"Horsenart95",
"ika_simpson",
"VedaDragon",
"Animals are so cute, and I really like your work!"
]
}
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "Needs to message lots of different people and some of the animals are quite hard to spot.",
"intent_template_id": 84
},
{
"sites": [
"reddit"
],
"task_id": 203,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/gaming?next%5Branking%5D=712&next%5Bid%5D=83741",
"geolocation": null,
"intent_template": "Can you message the users who posted {{condition}} on this page with '{{message}}'?",
"instantiation_dict": {
"condition": "pictures related to Nintendo games",
"message": "Mama mia!!"
},
"intent": "Can you message the users who posted pictures related to Nintendo games on this page with 'Mama mia!!'?",
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/messages",
"locator": "func:get_query_text(__page__, \"#main > table > tbody\")",
"required_contents": {
"must_include": [
"evolve_into_wizards",
"afieldbleed47",
"devilmaycry10092",
"peen_was",
"Ghimzzo",
"ENateFak",
"dorgobar",
"Mama mia!!"
]
}
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "hard",
"overall_difficulty": "hard",
"comments": "Did not check for three ambiguous posts __REDDIT__/f/gaming/61510/i-was-cleaning-my-basement-and-found-some-games-i-saved-from and __REDDIT__/f/gaming/105197/updated-game-room and __REDDIT__/f/gaming/126167/welp",
"intent_template_id": 84
},
{
"sites": [
"reddit"
],
"task_id": 204,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/space",
"geolocation": null,
"intent_template": "Can you take me to the comments section of {{post}} {{condition}}?",
"instantiation_dict": {
"post": "the last post on this page",
"condition": "that has a picture of a person"
},
"intent": "Can you take me to the comments section of the last post on this page that has a picture of a person?",
"require_reset": false,
"eval": {
"eval_types": [
"url_match"
],
"reference_answers": null,
"reference_url": "__REDDIT__/f/space/134033/3-years-ago-i-built-this-telescope-in-my-parents-garage-it-s",
"program_html": [],
"url_note": "EXACT"
},
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "",
"intent_template_id": 64
},
{
"sites": [
"reddit"
],
"task_id": 205,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/space?next%5Branking%5D=1321&next%5Bid%5D=27417",
"geolocation": null,
"intent_template": "What is the top comment on the post about {{condition}}?",
"instantiation_dict": {
"condition": "the company founded by the guy in the image"
},
"image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/2019-06-11-tesla-shareholder-meeting-elon-musk.jpg.webp",
"intent": "What is the top comment on the post about the company founded by the guy in the image?",
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"Sounds to me like Amazon has too many fingers in too many pies."
]
},
"reference_url": "",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": "Sounds to me like Amazon has too many fingers in too many pies."
},
"reasoning_difficulty": "easy",
"visual_difficulty": "medium",
"overall_difficulty": "easy",
"comments": "Guy in the picture is Elon Musk and he founded SpaceX.",
"intent_template_id": 85
},
{
"sites": [
"reddit"
],
"task_id": 206,
"require_login": false,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__/f/EarthPorn/most_commented?t=all&next%5BcommentCount%5D=89&next%5Bid%5D=76289",
"geolocation": null,
"intent_template": "What is the top comment on the post about {{condition}}?",
"instantiation_dict": {
"condition": "the thing that looks like the image"
},
"image": "__WIKIPEDIA__/wikipedia_en_all_maxi_2022-05/I/Crystal_skull_british_museum_random9834672.jpg.webp",
"intent": "What is the top comment on the post about the thing that looks like the image?",
"require_reset": false,
"eval": {
"eval_types": [
"string_match"
],
"reference_answers": {
"must_include": [
"Why not Zoidberg?"
]
},
"reference_url": "",
"program_html": [],
"string_note": "",
"reference_answer_raw_annotation": "Why not Zoidberg?"
},
"reasoning_difficulty": "easy",
"visual_difficulty": "easy",
"overall_difficulty": "easy",
"comments": "",
"intent_template_id": 85
},
{
"sites": [
"reddit",
"shopping"
],
"task_id": 207,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__ |AND| __SHOPPING__/sony-wh-1000xm4-wireless-noise-canceling-over-ear-headphones-black-with-sony-wla-ns7-wireless-tv-adapter-bundle-2-items.html |AND| __SHOPPING__/bang-olufsen-beoplay-hx-comfortable-wireless-anc-over-ear-headphones-timber.html",
"geolocation": null,
"intent_template": "Can you first make a post in {{forum1}} (then afterwards in {{forum2}}) asking for a comparison between the two {{item}} in the shopping tabs? Make sure to include both their SKUs in the title and a picture of the {{condition}}.",
"instantiation_dict": {
"forum1": "f/headphones",
"forum2": "f/gaming",
"item": "headphones",
"condition": "darker one"
},
"intent": "Can you first make a post in f/headphones (then afterwards in f/gaming) asking for a comparison between the two headphones in the shopping tabs? Make sure to include both their SKUs in the title and a picture of the darker one.",
"require_reset": true,
"eval": {
"eval_types": [
"program_html",
"page_image_query"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/user/MarvelsGrantMan136",
"locator": "func:get_query_text(__page__, \"#main > article:nth-child(4)\")",
"required_contents": {
"must_include": [
"headphones",
"B09M79MQYD",
"B08YJ3W1HB"
]
}
},
{
"url": "__REDDIT__/user/MarvelsGrantMan136",
"locator": "func:get_query_text(__page__, \"#main > article:nth-child(3)\")",
"required_contents": {
"must_include": [
"gaming",
"B09M79MQYD",
"B08YJ3W1HB"
]
}
}
],
"page_image_query": [
{
"eval_image_url": "__REDDIT__/user/MarvelsGrantMan136",
"eval_image_class": ".submission__row",
"eval_fuzzy_image_match": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09M79MQYD.0.jpg |OR| __SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09M79MQYD.1.jpg",
"ssim_threshold": 0.6
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "easy",
"overall_difficulty": "hard",
"comments": "",
"intent_template_id": 86
},
{
"sites": [
"reddit",
"shopping"
],
"task_id": 208,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__ |AND| __SHOPPING__/rca-43-4k-ultra-hd-2160p-hdr-roku-smart-led-tv-rtru4327-us.html |AND| __SHOPPING__/pioneer-43-inch-class-led-4k-uhd-smart-fire-tv-pn43951-22u-2021-model.html",
"geolocation": null,
"intent_template": "Can you first make a post in {{forum1}} (then afterwards in {{forum2}}) asking for a comparison between the two {{item}} in the shopping tabs? Make sure to include both their SKUs in the title and a picture of the {{condition}}.",
"instantiation_dict": {
"forum1": "f/monitor",
"forum2": "f/gadgets",
"item": "monitors",
"condition": "monitor with the movie titles"
},
"intent": "Can you first make a post in f/monitor (then afterwards in f/gadgets) asking for a comparison between the two monitors in the shopping tabs? Make sure to include both their SKUs in the title and a picture of the monitor with the movie titles.",
"require_reset": true,
"eval": {
"eval_types": [
"program_html",
"page_image_query"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/user/MarvelsGrantMan136",
"locator": "func:get_query_text(__page__, \"#main > article:nth-child(4)\")",
"required_contents": {
"must_include": [
"monitor",
"B09F3TW5CP",
"B09SBS75XB"
]
}
},
{
"url": "__REDDIT__/user/MarvelsGrantMan136",
"locator": "func:get_query_text(__page__, \"#main > article:nth-child(3)\")",
"required_contents": {
"must_include": [
"gadgets",
"B09F3TW5CP",
"B09SBS75XB"
]
}
}
],
"page_image_query": [
{
"eval_image_url": "__REDDIT__/user/MarvelsGrantMan136",
"eval_image_class": ".submission__row",
"eval_fuzzy_image_match": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09F3TW5CP.0.jpg |OR| __SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B09F3TW5CP.1.jpg",
"ssim_threshold": 0.6
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "",
"intent_template_id": 86
},
{
"sites": [
"reddit",
"shopping"
],
"task_id": 209,
"require_login": true,
"storage_state": "./.auth/reddit_state.json",
"start_url": "__REDDIT__ |AND| __SHOPPING__/ultra-game-nfl-men-s-full-zip-soft-fleece-hoodie-letterman-varsity-jacket.html |AND| __SHOPPING__/ultra-game-nfl-men-s-active-long-sleeve-tee-shirt.html",
"geolocation": null,
"intent_template": "Can you first make a post in {{forum1}} (then afterwards in {{forum2}}) asking for a comparison between the two {{item}} in the shopping tabs? Make sure to include both their SKUs in the title and a picture of the {{condition}}.",
"instantiation_dict": {
"forum1": "f/pittsburgh",
"forum2": "f/Pennsylvania",
"item": "products",
"condition": "front of the hoodie"
},
"intent": "Can you first make a post in f/pittsburgh (then afterwards in f/Pennsylvania) asking for a comparison between the two products in the shopping tabs? Make sure to include both their SKUs in the title and a picture of the front of the hoodie.",
"require_reset": true,
"eval": {
"eval_types": [
"program_html",
"page_image_query"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "__REDDIT__/user/MarvelsGrantMan136",
"locator": "func:get_query_text(__page__, \"#main > article:nth-child(4)\")",
"required_contents": {
"must_include": [
"pittsburgh",
"B01MQK1BQP",
"B07BCKK3HS"
]
}
},
{
"url": "__REDDIT__/user/MarvelsGrantMan136",
"locator": "func:get_query_text(__page__, \"#main > article:nth-child(3)\")",
"required_contents": {
"must_include": [
"Pennsylvania",
"B01MQK1BQP",
"B07BCKK3HS"
]
}
}
],
"page_image_query": [
{
"eval_image_url": "__REDDIT__/user/MarvelsGrantMan136",
"eval_image_class": ".submission__row",
"eval_fuzzy_image_match": "__SHOPPING__/media/catalog/product/cache/829a59e57f886f8cf0598ffca4f8a940/B/0/B01MQK1BQP.0.jpg",
"ssim_threshold": 0.6
}
]
},
"reasoning_difficulty": "hard",
"visual_difficulty": "medium",
"overall_difficulty": "hard",
"comments": "",
"intent_template_id": 86
}
]