79 lines
2.4 KiB
Python
79 lines
2.4 KiB
Python
import json
|
|
import re
|
|
|
|
# Universal Strategy definitions
|
|
us_definitions = {
|
|
"US1": "Navigate To Page/Section",
|
|
"US2": "Search/Filter/Sort Data",
|
|
"US3": "Configure Parameters/Settings",
|
|
"US4": "Execute Action/Process",
|
|
"US5": "View/Inspect Item Details",
|
|
"US6": "Extract/Retrieve Information",
|
|
"US7": "Analyze/Evaluate/Verify Data",
|
|
"US8": "Navigate Within Data/Results",
|
|
"US9": "Create Item",
|
|
"US10": "Update/Modify Item",
|
|
"US11": "Delete Item"
|
|
}
|
|
|
|
# File paths
|
|
md_file = '/mnt/data/uinseral_strategy_v2.md'
|
|
json_file = '/mnt/data/test.raw.json'
|
|
output_file = '/mnt/data/tasks_output.json'
|
|
|
|
# Read and parse the markdown mapping table
|
|
with open(md_file, 'r', encoding='utf-8') as f:
|
|
lines = f.readlines()
|
|
|
|
mapping_lines = []
|
|
in_mapping = False
|
|
for line in lines:
|
|
if line.strip().startswith("| task id") and "original strategy" in line:
|
|
in_mapping = True
|
|
continue
|
|
if in_mapping:
|
|
if line.strip().startswith("|"):
|
|
mapping_lines.append(line.strip())
|
|
else:
|
|
break
|
|
|
|
# Remove header row and separator row
|
|
data_lines = mapping_lines[2:]
|
|
|
|
# Build mapping dict for each task_id
|
|
tasks_map = {}
|
|
for row in data_lines:
|
|
parts = [cell.strip() for cell in row.strip('|').split('|')]
|
|
if len(parts) != 4:
|
|
continue
|
|
task_id, orig_strategy, uni_strategy, uni_id = parts
|
|
tid = int(task_id)
|
|
tasks_map.setdefault(tid, []).append({
|
|
"strategy": orig_strategy,
|
|
"universal_strategy": us_definitions.get(uni_id, ""),
|
|
"universal_strategy_id": uni_id
|
|
})
|
|
|
|
# Load the raw JSON file
|
|
with open(json_file, 'r', encoding='utf-8') as f:
|
|
tests = json.load(f)
|
|
|
|
# Assemble the final output list
|
|
output = []
|
|
for tid, entries in tasks_map.items():
|
|
test_entry = next((t for t in tests if t.get('task_id') == tid), None)
|
|
if test_entry:
|
|
output.append({
|
|
"task_id": tid,
|
|
"sites": test_entry.get("sites", []),
|
|
"intent": test_entry.get("intent", ""),
|
|
"strategies": [e["strategy"] for e in entries],
|
|
"universal_strategies": [e["universal_strategy"] for e in entries],
|
|
"universal_strategy_ids": [e["universal_strategy_id"] for e in entries],
|
|
"reference_answer_raw_annotation": test_entry["eval"].get("reference_answer_raw_annotation", "")
|
|
})
|
|
|
|
# Write the output JSON
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
json.dump(output, f, indent=4, ensure_ascii=False)
|