@article{koh2024visualwebarena, title={VisualWebArena: Evaluating Multimodal Agents on Realistic Visual Web Tasks}, author={Koh, Jing Yu and Lo, Robert and Jang, Lawrence and Duvvur, Vikram and Lim, Ming Chong and Huang, Po-Yu and Neubig, Graham and Zhou, Shuyan and Salakhutdinov, Ruslan and Fried, Daniel}, journal={arXiv preprint arXiv:24xx.xxxxx}, year={2024} }