import json import re # Universal Strategy definitions us_definitions = { "US1": "Navigate To Page/Section", "US2": "Search/Filter/Sort Data", "US3": "Configure Parameters/Settings", "US4": "Execute Action/Process", "US5": "View/Inspect Item Details", "US6": "Extract/Retrieve Information", "US7": "Analyze/Evaluate/Verify Data", "US8": "Navigate Within Data/Results", "US9": "Create Item", "US10": "Update/Modify Item", "US11": "Delete Item" } # File paths md_file = '/mnt/data/uinseral_strategy_v2.md' json_file = '/mnt/data/test.raw.json' output_file = '/mnt/data/tasks_output.json' # Read and parse the markdown mapping table with open(md_file, 'r', encoding='utf-8') as f: lines = f.readlines() mapping_lines = [] in_mapping = False for line in lines: if line.strip().startswith("| task id") and "original strategy" in line: in_mapping = True continue if in_mapping: if line.strip().startswith("|"): mapping_lines.append(line.strip()) else: break # Remove header row and separator row data_lines = mapping_lines[2:] # Build mapping dict for each task_id tasks_map = {} for row in data_lines: parts = [cell.strip() for cell in row.strip('|').split('|')] if len(parts) != 4: continue task_id, orig_strategy, uni_strategy, uni_id = parts tid = int(task_id) tasks_map.setdefault(tid, []).append({ "strategy": orig_strategy, "universal_strategy": us_definitions.get(uni_id, ""), "universal_strategy_id": uni_id }) # Load the raw JSON file with open(json_file, 'r', encoding='utf-8') as f: tests = json.load(f) # Assemble the final output list output = [] for tid, entries in tasks_map.items(): test_entry = next((t for t in tests if t.get('task_id') == tid), None) if test_entry: output.append({ "task_id": tid, "sites": test_entry.get("sites", []), "intent": test_entry.get("intent", ""), "strategies": [e["strategy"] for e in entries], "universal_strategies": [e["universal_strategy"] for e in entries], "universal_strategy_ids": [e["universal_strategy_id"] for e in entries], "reference_answer_raw_annotation": test_entry["eval"].get("reference_answer_raw_annotation", "") }) # Write the output JSON with open(output_file, 'w', encoding='utf-8') as f: json.dump(output, f, indent=4, ensure_ascii=False)