64 lines
2.1 KiB
Python
64 lines
2.1 KiB
Python
"""Replace the website placeholders with website domains from env_config
|
|
Generate the test data"""
|
|
import json
|
|
import os
|
|
|
|
from browser_env.env_config import *
|
|
|
|
|
|
def main() -> None:
|
|
DATASET = os.environ["DATASET"]
|
|
if DATASET == "webarena":
|
|
print("DATASET: webarena")
|
|
print(f"REDDIT: {REDDIT}")
|
|
print(f"SHOPPING: {SHOPPING}")
|
|
print(f"SHOPPING_ADMIN: {SHOPPING_ADMIN}")
|
|
print(f"GITLAB: {GITLAB}")
|
|
print(f"WIKIPEDIA: {WIKIPEDIA}")
|
|
print(f"MAP: {MAP}")
|
|
|
|
inp_paths = ["config_files/wa/test_webarena.raw.json", "config_files/wa/test_webarena_lite.raw.json"]
|
|
replace_map = {
|
|
"__REDDIT__": REDDIT,
|
|
"__SHOPPING__": SHOPPING,
|
|
"__SHOPPING_ADMIN__": SHOPPING_ADMIN,
|
|
"__GITLAB__": GITLAB,
|
|
"__WIKIPEDIA__": WIKIPEDIA,
|
|
"__MAP__": MAP,
|
|
}
|
|
elif DATASET == "visualwebarena":
|
|
print("DATASET: visualwebarena")
|
|
print(f"CLASSIFIEDS: {CLASSIFIEDS}")
|
|
print(f"REDDIT: {REDDIT}")
|
|
print(f"SHOPPING: {SHOPPING}")
|
|
inp_paths = [
|
|
"config_files/vwa/test_classifieds.raw.json", "config_files/vwa/test_shopping.raw.json", "config_files/vwa/test_reddit.raw.json",
|
|
]
|
|
replace_map = {
|
|
"__REDDIT__": REDDIT,
|
|
"__SHOPPING__": SHOPPING,
|
|
"__WIKIPEDIA__": WIKIPEDIA,
|
|
"__CLASSIFIEDS__": CLASSIFIEDS,
|
|
}
|
|
else:
|
|
raise ValueError(f"Dataset not implemented: {DATASET}")
|
|
|
|
for inp_path in inp_paths:
|
|
output_dir = inp_path.replace('.raw.json', '')
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
with open(inp_path, "r") as f:
|
|
raw = f.read()
|
|
for k, v in replace_map.items():
|
|
raw = raw.replace(k, v)
|
|
|
|
with open(inp_path.replace(".raw", ""), "w") as f:
|
|
f.write(raw)
|
|
data = json.loads(raw)
|
|
for idx, item in enumerate(data):
|
|
with open(os.path.join(output_dir, f"{idx}.json"), "w") as f:
|
|
json.dump(item, f, indent=2)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|