trace_synthesis/tasks/530.json
yuyr a84d51a101 1. 增加r1生成综合策略代码和输出;
2. 增加tasks;
3. 增加analysis部分,对策略进行归纳分类,然后进行评测。
2025-04-17 17:40:15 +08:00

40 lines
1.3 KiB
JSON

{
"sites": [
"shopping"
],
"task_id": 530,
"require_login": true,
"storage_state": "./.auth/shopping_state.json",
"start_url": "http://localhost:28082",
"geolocation": null,
"intent_template": "Draft a refund message via their \"contact us\" form for the {{product}} I bought {{time}}. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet",
"instantiation_dict": {
"product": "kitchen organizer",
"time": "around Feb 2023"
},
"intent": "Draft a refund message via their \"contact us\" form for the kitchen organizer I bought around Feb 2023. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet",
"require_reset": false,
"eval": {
"eval_types": [
"program_html"
],
"reference_answers": null,
"reference_url": "",
"program_html": [
{
"url": "last",
"locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value",
"required_contents": {
"must_include": [
"refund",
"broke",
"three days of use",
"000000161",
"68.88"
]
}
}
]
},
"intent_template_id": 154
}