update
This commit is contained in:
parent
4b8dbfd9c5
commit
c078ba6292
|
@ -70,7 +70,7 @@ env:
|
||||||
prune: true
|
prune: true
|
||||||
max_browser_rows: 500
|
max_browser_rows: 500
|
||||||
headless: True
|
headless: True
|
||||||
task_ids: ["stanford_cs_head", 65]
|
task_ids: ["Allrecipes--3", 65]
|
||||||
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
||||||
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
||||||
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
||||||
|
|
|
@ -18,7 +18,7 @@ env:
|
||||||
max_env_steps: 20
|
max_env_steps: 20
|
||||||
max_browser_rows: 500
|
max_browser_rows: 500
|
||||||
headless: True
|
headless: True
|
||||||
task_ids: ["stanford_cs_head", 65]
|
task_ids: ["Allrecipes--3", 65]
|
||||||
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
||||||
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
||||||
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
||||||
|
|
|
@ -70,7 +70,7 @@ env:
|
||||||
prune: true
|
prune: true
|
||||||
max_browser_rows: 500
|
max_browser_rows: 500
|
||||||
headless: True
|
headless: True
|
||||||
task_ids: ["stanford_cs_head", 65]
|
task_ids: ["Allrecipes--3", 65]
|
||||||
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
||||||
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
||||||
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
||||||
|
|
|
@ -18,7 +18,7 @@ env:
|
||||||
max_env_steps: 20
|
max_env_steps: 20
|
||||||
max_browser_rows: 500
|
max_browser_rows: 500
|
||||||
headless: True
|
headless: True
|
||||||
task_ids: ["stanford_cs_head", 65]
|
task_ids: ["Allrecipes--3", 65]
|
||||||
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
||||||
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
||||||
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
||||||
|
|
|
@ -70,7 +70,7 @@ env:
|
||||||
prune: true
|
prune: true
|
||||||
max_browser_rows: 500
|
max_browser_rows: 500
|
||||||
headless: True
|
headless: True
|
||||||
task_ids: ["stanford_cs_head", 65]
|
task_ids: ["Allrecipes--3", 65]
|
||||||
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
||||||
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
||||||
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
||||||
|
|
|
@ -70,7 +70,7 @@ env:
|
||||||
prune: true
|
prune: true
|
||||||
max_browser_rows: 500
|
max_browser_rows: 500
|
||||||
headless: True
|
headless: True
|
||||||
task_ids: ["stanford_cs_head", 65]
|
task_ids: ["Allrecipes--3", 65]
|
||||||
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
||||||
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
||||||
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
||||||
|
|
|
@ -70,7 +70,7 @@ env:
|
||||||
prune: false
|
prune: false
|
||||||
max_browser_rows: 500
|
max_browser_rows: 500
|
||||||
headless: True
|
headless: True
|
||||||
task_ids: ["stanford_cs_head", 65]
|
task_ids: ["Allrecipes--3", 65]
|
||||||
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
||||||
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
||||||
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
||||||
|
|
|
@ -70,7 +70,7 @@ env:
|
||||||
prune: false
|
prune: false
|
||||||
max_browser_rows: 500
|
max_browser_rows: 500
|
||||||
headless: True
|
headless: True
|
||||||
task_ids: ["stanford_cs_head", 65]
|
task_ids: ["Allrecipes--3", 65]
|
||||||
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
||||||
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
||||||
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
||||||
|
|
31
README.md
31
README.md
|
@ -1,17 +1,15 @@
|
||||||
# AgentOccam
|
# AgentOccam
|
||||||
Code for "[AgentOccam: A Simple Yet Strong Baseline for LLM-Based Web Agents]()".
|
Code for "[AgentOccam: A Simple Yet Strong Baseline for LLM-Based Web Agents](https://arxiv.org/abs/2410.13825)" (ICLR 2025).
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
We work on automating web tasks! 🏄🏄🏄 We refine the LLM-based web agents by aligning their observation and action space with the capabilities of LLMs.
|
AgentOccam offers a simple but strong baseline for LLM-based web agents. By providing a URL and the task you want it to perform, AgentOccam can execute it for you. Its simplicity and effectiveness allow you to run it directly, or adapt it into a larger pipeline for its executing web tasks, such as web information retrieval before processing the documents.
|
||||||
|
|
||||||
The newly designed agent AgentOccam surpasses previous state-of-the-art methods and concurrent work significantly w/o in-context examples, new agent roles, online feedback or search strategies on [WebArena](https://webarena.dev), a benchmark featuring general-purpose web tasks. 🍺
|
**Without using in-context examples, new agent roles, online feedback, or search strategies**, AgentOccam demonstrates impressive performance on tasks in WebArena (a web simulator benchmark with tasks from sites like shopping, shopping admin, GitLab, Reddit, map, etc.) and tasks with golden answers in WebVoyager (a benchmark based on real web tasks), once surpassing the SOTA on both leaderboards.
|
||||||
|
|
||||||
We shed light on LLMs' impressive zero-shot performance on web tasks, and the critical role of carefully tuning observation and action spaces for LLM-based agents. 🧙
|
In brief, our approach aligns the input (webpage descriptions, i.e., agent observations) and output (action strings that can be translated into web interactions, i.e., agent actions) of web tasks, with the tasks that LLMs are most familiar with, such as reading comprehension and question-answering. We refer to our approach **agent observation and action space alignment**, shedding light on LLMs' impressive zero-shot performance on web tasks, and **the critical role of carefully tuning observation and action spaces for LLM-based agents**.
|
||||||
|
|
||||||
You can let AgentOccam interact with other websites like Google per your requests by defining the task config files, as seen in the example in `config_files/tasks/standford_cs_head.json`. Have fun playing with it! :)
|
You can let AgentOccam interact with other websites by defining task config files, as seen in the example in `config_files/tasks/Allrecipes--3.json`. Have fun playing with it! :)
|
||||||
|
|
||||||
*Please check whether reddit post exceeds limits, login expires, or any other webarena simulator/website failure exists when you finish one round. You should restart the simluator/relogin to the websites and rerun those tasks before reporting your final success rate. Additionally, LLM policy varies even given the same task as the generation temperature is set to >0 for more diverse exploration. Therefore, it is expected that you can get difference traces when starting the same task multiple times. Try it out with the basic `config_files/tasks/standford_cs_head.json`!*
|
|
||||||
|
|
||||||
## WebArena Replication
|
## WebArena Replication
|
||||||
### Environment Setup
|
### Environment Setup
|
||||||
|
@ -31,7 +29,7 @@ mkdir .auth
|
||||||
```
|
```
|
||||||
|
|
||||||
### Experiments
|
### Experiments
|
||||||
#### AgentOccam-Series and SteP-Replication
|
#### AgentOccam-Series and SteP-Replication (Please refer to SteP's official repo for their latest agent code.)
|
||||||
* Connect to the WebArena host server.
|
* Connect to the WebArena host server.
|
||||||
* Export the env configs:
|
* Export the env configs:
|
||||||
```bash
|
```bash
|
||||||
|
@ -43,7 +41,7 @@ export MAP="http://<webarena_server_address>:3000"
|
||||||
export WIKIPEDIA="http://<webarena_server_address>:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing"
|
export WIKIPEDIA="http://<webarena_server_address>:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing"
|
||||||
export HOMEPAGE="http://<webarena_server_address>:4399"
|
export HOMEPAGE="http://<webarena_server_address>:4399"
|
||||||
export OPENAI_API_KEY="<openai_api_key>"
|
export OPENAI_API_KEY="<openai_api_key>"
|
||||||
export GEMINI_API_KEY="<gemini_api_key>"
|
export GEMINI_API_KEY="<gemini_api_key>" # Optional, we provide several other agent base models, such as Claude and LLaMa.
|
||||||
```
|
```
|
||||||
* Login in:
|
* Login in:
|
||||||
```bash
|
```bash
|
||||||
|
@ -54,10 +52,16 @@ python browser_env/auto_login.py
|
||||||
python eval_webarena.py --config AgentOccam/configs/AgentOccam.yml # Replace the yml config with your target one.
|
python eval_webarena.py --config AgentOccam/configs/AgentOccam.yml # Replace the yml config with your target one.
|
||||||
```
|
```
|
||||||
*You can use directly run `bash script/run_config.sh` after replacing the experiment configurations.*
|
*You can use directly run `bash script/run_config.sh` after replacing the experiment configurations.*
|
||||||
#### WebArena-Replication
|
|
||||||
|
*Please check whether reddit post exceeds limits, login expires, or any other webarena simulator/website failure exists when you finish one round at WebArena. Additionally, LLM policy varies even given the same task, as the generation temperature is set to >0 for more diverse exploration. Therefore, it is expected that you can get difference traces when starting the same task multiple times. Try it out with the basic `config_files/tasks/Allrecipes--3.json`.*
|
||||||
|
#### WebArena-Agent (Please refer to WebArena's official repo for their latest agent code.)
|
||||||
```bash
|
```bash
|
||||||
bash scripts/run_webarena.sh
|
bash scripts/run_webarena.sh
|
||||||
```
|
```
|
||||||
|
### Trajectories
|
||||||
|
Placed at [this link](https://drive.google.com/drive/folders/1MjnDIlfPGPjMFszirQO46fdP4LkH6669?usp=sharing).
|
||||||
|
### Human Assessment
|
||||||
|
Placed at `files/human_assessment/WebArena-AgentOccam.csv`.
|
||||||
|
|
||||||
## WebVoyager Replication
|
## WebVoyager Replication
|
||||||
### Environment Setup
|
### Environment Setup
|
||||||
|
@ -82,11 +86,14 @@ cd ../AgentOccam
|
||||||
```bash
|
```bash
|
||||||
python eval_webarena.py --config AgentOccam/configs/AgentOccam-WebVoyager.yml
|
python eval_webarena.py --config AgentOccam/configs/AgentOccam-WebVoyager.yml
|
||||||
```
|
```
|
||||||
#### Agent-E
|
#### Agent-E (Please refer to Agent-E's official repo for their latest agent code.)
|
||||||
```bash
|
```bash
|
||||||
python -m agente_replication --task_ids Allrecipes--3
|
python -m agente_replication --task_ids Allrecipes--3
|
||||||
```
|
```
|
||||||
|
### Trajectories
|
||||||
|
Placed at [this link](https://drive.google.com/drive/folders/1d5xPdOhYWjDTJqLmehdXs2AbtWCc4SXB?usp=sharing).
|
||||||
|
### Human Assessment
|
||||||
|
Placed at `files/human_assessment/WebVoyager-AgentOccam_*.csv`.
|
||||||
|
|
||||||
|
|
||||||
## Agent Configuration Explanation
|
## Agent Configuration Explanation
|
||||||
|
|
20
config_files/tasks/Allrecipes--3.json
Normal file
20
config_files/tasks/Allrecipes--3.json
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
{
|
||||||
|
"sites": [
|
||||||
|
"allrecipes"
|
||||||
|
],
|
||||||
|
"task_id": "Allrecipes--3",
|
||||||
|
"require_login": false,
|
||||||
|
"storage_state": null,
|
||||||
|
"start_url": "https://www.allrecipes.com/",
|
||||||
|
"geolocation": null,
|
||||||
|
"intent": "Locate a recipe for vegan chocolate chip cookies with over 60 reviews and a rating of at least 4.5 stars on Allrecipes.",
|
||||||
|
"require_reset": false,
|
||||||
|
"eval": {
|
||||||
|
"eval_types": [
|
||||||
|
"string_match"
|
||||||
|
],
|
||||||
|
"reference_answers": {
|
||||||
|
"fuzzy_match": "'Vegan Chocolate Chip, Oatmeal, and Nut Cookies', 4.9 star, 67 viewers (> 60)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,20 +0,0 @@
|
||||||
{
|
|
||||||
"sites": [
|
|
||||||
"google"
|
|
||||||
],
|
|
||||||
"task_id": "stanford_cs_head",
|
|
||||||
"require_login": false,
|
|
||||||
"storage_state": null,
|
|
||||||
"start_url": "https://www.google.com/",
|
|
||||||
"geolocation": null,
|
|
||||||
"intent": "What is the email address of the Dean of the School of Engineering at Stanford University?",
|
|
||||||
"require_reset": false,
|
|
||||||
"eval": {
|
|
||||||
"eval_types": [
|
|
||||||
"string_match"
|
|
||||||
],
|
|
||||||
"reference_answers": {
|
|
||||||
"exact_match": "widom@stanford.edu"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,20 +0,0 @@
|
||||||
{
|
|
||||||
"sites": [
|
|
||||||
"google"
|
|
||||||
],
|
|
||||||
"task_id": "tsinghua_da_head",
|
|
||||||
"require_login": false,
|
|
||||||
"storage_state": null,
|
|
||||||
"start_url": "https://www.google.com/",
|
|
||||||
"geolocation": null,
|
|
||||||
"intent": "帮我查一下清华大学自动化系系主任的邮箱",
|
|
||||||
"require_reset": false,
|
|
||||||
"eval": {
|
|
||||||
"eval_types": [
|
|
||||||
"string_match"
|
|
||||||
],
|
|
||||||
"reference_answers": {
|
|
||||||
"exact_match": "taozhang@tsinghua.edu.cn"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -3809,7 +3809,8 @@
|
||||||
"locator": "",
|
"locator": "",
|
||||||
"required_contents": {
|
"required_contents": {
|
||||||
"must_include": [
|
"must_include": [
|
||||||
"jaw bruxism",
|
"jaw",
|
||||||
|
"bruxism",
|
||||||
"mouth guard"
|
"mouth guard"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -28661,7 +28662,7 @@
|
||||||
],
|
],
|
||||||
"reference_answers": {
|
"reference_answers": {
|
||||||
"must_include": [
|
"must_include": [
|
||||||
"412"
|
"414"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"reference_url": "",
|
"reference_url": "",
|
||||||
|
|
814
files/human_assessment/WebArena-AgentOccam.csv
Normal file
814
files/human_assessment/WebArena-AgentOccam.csv
Normal file
|
@ -0,0 +1,814 @@
|
||||||
|
Task ID,Alice (Correct: 1; Partially Correct: 0.5; Incorrect: 0),Alice's Note,Bob,Bob's Note,Colin,Colin's Note,Average
|
||||||
|
0,0.5,,1.00,,1,,0.833333333
|
||||||
|
1,1,,1.00,,1,,1
|
||||||
|
2,0.5,,0.50,product type refers to the general category not the products,1,,0.666666667
|
||||||
|
3,1,,1.00,,1,,1
|
||||||
|
4,1,,1.00,,1,,1
|
||||||
|
5,1,,0.50,product type is different from product,1,,0.833333333
|
||||||
|
6,1,,1.00,,1,"Uncertainty exists for 0-6, as the query time cannot be determined.",1
|
||||||
|
7,0,,0.00,"failed to click ""Go"" due to the FROM field wasn't actually filled",0,,0
|
||||||
|
8,1,,0.50,identify there's no airport within 5km but incorrectly identify the nearst international airport prob due to openstreet api seatch function,0,,0.5
|
||||||
|
9,1,,1.00,,0,,0.666666667
|
||||||
|
10,0,,0.50,identify the limitation of the openstreet api but didn't give correct answer for this task,0,,0.166666667
|
||||||
|
11,1,,0.00,didn't type and search in the correct field,1,,0.666666667
|
||||||
|
12,0,,0.00,didn't go to the correct tab,0,,0
|
||||||
|
13,1,,1.00,,1,,1
|
||||||
|
14,0,,0.00,didn't type and search in the correct field,0,,0
|
||||||
|
15,1,,1.00,,1,,1
|
||||||
|
16,1,,1.00,,1,,1
|
||||||
|
17,1,,1.00,,1,,1
|
||||||
|
18,1,,1.00,,1,,1
|
||||||
|
19,1,,1.00,,1,,1
|
||||||
|
20,1,,1.00,,1,,1
|
||||||
|
21,1,,1.00,,0.5,,0.833333333
|
||||||
|
22,1,,1.00,,0.5,,0.833333333
|
||||||
|
23,1,,1.00,,0.5,,0.833333333
|
||||||
|
24,1,,1.00,,1,,1
|
||||||
|
25,1,,0.50,can't detect implicitly mentioned related to average print quality,1,,0.833333333
|
||||||
|
26,1,,0.00,filed to identify customer service complaints,1,,0.666666667
|
||||||
|
27,0,,0.00,,0,,0
|
||||||
|
28,0,q,0.00,didn't quite perfomed the task related to that forum,1,,0.333333333
|
||||||
|
29,0,q,0.00,,0,,0
|
||||||
|
30,0,error,0.00,,0,,0
|
||||||
|
31,0,,0.00,,0,,0
|
||||||
|
32,1,,1.00,,1,,1
|
||||||
|
33,0,error,0.00,tried to go to Google Map but failed,0,,0
|
||||||
|
34,0,,0.00,,0,,0
|
||||||
|
35,0,,0.00,,0,,0
|
||||||
|
36,1,,1.00,,1,,1
|
||||||
|
37,0,,0.00,,0,,0
|
||||||
|
38,1,,1.00,,1,,1
|
||||||
|
39,0,,0.50,openstreet api,0,,0.166666667
|
||||||
|
40,0,,0.50,,0,,0.166666667
|
||||||
|
41,1,,1.00,,0,,0.666666667
|
||||||
|
42,1,,1.00,,1,,1
|
||||||
|
43,0,,1.00,since nike has no result in the store,1,,0.666666667
|
||||||
|
44,1,,1.00,,0.5,,0.833333333
|
||||||
|
45,1,,1.00,,1,,1
|
||||||
|
46,1,,1.00,,0.5,,0.833333333
|
||||||
|
47,0,,1.00,,1,,0.666666667
|
||||||
|
48,1,,1.00,,1,,1
|
||||||
|
49,0,,1.00,,1,,0.666666667
|
||||||
|
50,0,,0.00,should be more than 16,1,,0.333333333
|
||||||
|
51,0,,0.50,included pending orders,1,,0.5
|
||||||
|
52,1,,1.00,,1,,1
|
||||||
|
53,1,,1.00,,1,,1
|
||||||
|
54,1,,1.00,,1,,1
|
||||||
|
55,0,,0.50,,0,,0.166666667
|
||||||
|
56,0,,0.50,,0.5,,0.333333333
|
||||||
|
57,1,,1.00,,1,,1
|
||||||
|
58,0,,1.00,,0,,0.333333333
|
||||||
|
59,0,,1.00,,0,,0.333333333
|
||||||
|
60,0,,1.00,,0,,0.333333333
|
||||||
|
61,0,,1.00,,0,,0.333333333
|
||||||
|
62,1,,1.00,,1,,1
|
||||||
|
63,1,,1.00,,1,,1
|
||||||
|
64,0,,0.00,,0.5,,0.166666667
|
||||||
|
65,0,,0.00,,0,,0
|
||||||
|
66,1,,0.50,didnt identify all,1,,0.833333333
|
||||||
|
67,1,,0.50,,1,,0.833333333
|
||||||
|
68,1,,0.50,,0.5,,0.666666667
|
||||||
|
69,1,,1.00,,1,,1
|
||||||
|
70,1,,1.00,,1,,1
|
||||||
|
71,1,,1.00,,1,,1
|
||||||
|
72,0.5,,0.00,,1,,0.5
|
||||||
|
73,1,,1.00,,1,,1
|
||||||
|
74,0.5,,0.00,wrong order,1,,0.5
|
||||||
|
75,0.5,,0.00,,0.5,,0.333333333
|
||||||
|
76,0.5,,1.00,,0.5,,0.666666667
|
||||||
|
77,1,,0.00,,1,,0.666666667
|
||||||
|
78,0,,0.00,,1,,0.333333333
|
||||||
|
79,0,,0.00,,1,,0.333333333
|
||||||
|
80,0,,0.00,Starbuck on Craig St?,1,,0.333333333
|
||||||
|
81,0,,0.00,,0,,0
|
||||||
|
82,1,,1.00,,1,,1
|
||||||
|
83,0,,0.00,,0,,0
|
||||||
|
84,1,,1.00,,1,,1
|
||||||
|
85,1,,1.00,use airport to estimate,1,,1
|
||||||
|
86,1,,1.00,,1,,1
|
||||||
|
87,1,,1.00,,1,,1
|
||||||
|
88,1,,1.00,,1,,1
|
||||||
|
89,0,,0.00,unable to access Wiki,0,,0
|
||||||
|
90,0.5,,1.00,,1,,0.833333333
|
||||||
|
91,0.5,,1.00,,1,,0.833333333
|
||||||
|
92,0.5,,1.00,,1,,0.833333333
|
||||||
|
93,0,,1.00,,1,,0.666666667
|
||||||
|
94,1,,1.00,,1,,1
|
||||||
|
95,1,,1.00,,1,,1
|
||||||
|
96,1,,1.00,,0.5,,0.833333333
|
||||||
|
97,1,,1.00,,1,,1
|
||||||
|
98,1,,0.50,seems there are nearer?,1,,0.833333333
|
||||||
|
99,0,,0.00,,0,,0
|
||||||
|
100,0,,0.00,,0,,0
|
||||||
|
101,0,,0.00,,0,,0
|
||||||
|
102,0.5,,0.00,help needed / help wanted,0,,0.166666667
|
||||||
|
103,0,,0.00,,0,,0
|
||||||
|
104,0,,0.00,,0,,0
|
||||||
|
105,0,,0.00,,0.5,,0.166666667
|
||||||
|
106,0,,0.00,,0,,0
|
||||||
|
107,0.5,,0.00,,1,,0.5
|
||||||
|
108,0.5,,0.00,,1,,0.5
|
||||||
|
109,0.5,,0.00,,0.5,,0.333333333
|
||||||
|
110,0.5,,0.00,,0,,0.166666667
|
||||||
|
111,0.5,,0.50,didnt count right,1,,0.666666667
|
||||||
|
112,0,,0.00,,0.5,,0.166666667
|
||||||
|
113,0,,0.00,,0.5,,0.166666667
|
||||||
|
114,0,,0.00,,0.5,,0.166666667
|
||||||
|
115,0,,0.00,,0.5,,0.166666667
|
||||||
|
116,0.5,,0.00,,0.5,,0.333333333
|
||||||
|
117,0,,0.00,,0,,0
|
||||||
|
118,1,,1.00,,1,,1
|
||||||
|
119,1,,1.00,,1,,1
|
||||||
|
120,1,,1.00,,1,,1
|
||||||
|
121,1,,0.50,also included why people dont like it,1,,0.833333333
|
||||||
|
122,1,,1.00,,1,,1
|
||||||
|
123,0,,0.00,,1,,0.333333333
|
||||||
|
124,0,,0.00,only examined one page,0.5,,0.166666667
|
||||||
|
125,0,,0.00,,1,,0.333333333
|
||||||
|
126,0,,0.00,,0.5,,0.166666667
|
||||||
|
127,1,,1.00,,1,,1
|
||||||
|
128,1,,1.00,,1,,1
|
||||||
|
129,1,,1.00,,1,,1
|
||||||
|
130,1,,1.00,,1,,1
|
||||||
|
131,0.5,,0.50,didn't finish,1,,0.666666667
|
||||||
|
132,1,,1.00,,1,,1
|
||||||
|
133,1,,1.00,,1,,1
|
||||||
|
134,1,,1.00,,1,,1
|
||||||
|
135,1,,1.00,repetition of action,1,,1
|
||||||
|
136,1,,0.00,,1,,0.666666667
|
||||||
|
137,1,,1.00,,1,,1
|
||||||
|
138,1,,1.00,,1,,1
|
||||||
|
139,1,,1.00,,1,,1
|
||||||
|
140,1,,1.00,,1,,1
|
||||||
|
141,0.5,,0.50,tried to look into one transection type,1,,0.666666667
|
||||||
|
142,0.5,,0.00,,1,,0.5
|
||||||
|
143,0.5,,0.00,,1,,0.5
|
||||||
|
144,0.5,,0.00,,1,,0.5
|
||||||
|
145,0.5,,0.00,,1,,0.5
|
||||||
|
146,1,,1.00,,1,,1
|
||||||
|
147,0,,0.50,,0,,0.166666667
|
||||||
|
148,0,,1.00,,1,,0.666666667
|
||||||
|
149,0.5,,1.00,,1,,0.833333333
|
||||||
|
150,1,,1.00,,1,,1
|
||||||
|
151,1,,1.00,,1,,1
|
||||||
|
152,1,,1.00,,1,,1
|
||||||
|
153,1,,1.00,,1,,1
|
||||||
|
154,1,,1.00,,1,,1
|
||||||
|
155,1,,1.00,,1,,1
|
||||||
|
156,1,,1.00,,1,,1
|
||||||
|
157,1,,1.00,,1,,1
|
||||||
|
158,1,,1.00,,1,,1
|
||||||
|
159,1,,1.00,,1,,1
|
||||||
|
160,1,,1.00,,1,,1
|
||||||
|
161,1,,1.00,,1,,1
|
||||||
|
162,0.5,,1.00,,0,,0.5
|
||||||
|
163,1,,0.00,,1,,0.666666667
|
||||||
|
164,1,,1.00,,1,,1
|
||||||
|
165,1,,1.00,,1,,1
|
||||||
|
166,1,,1.00,,1,,1
|
||||||
|
167,1,,1.00,,1,,1
|
||||||
|
168,0,,0.00,,1,,0.333333333
|
||||||
|
169,0,,1.00,,1,,0.666666667
|
||||||
|
170,0.5,,1.00,,1,,0.833333333
|
||||||
|
171,0,,0.00,,0,,0
|
||||||
|
172,0,,1.00,,1,,0.666666667
|
||||||
|
173,1,,1.00,,1,,1
|
||||||
|
174,0.5,it only looks for open issues instead of all,1.00,,1,,0.833333333
|
||||||
|
175,0.5,,1.00,,1,,0.833333333
|
||||||
|
176,0.5,,1.00,,1,,0.833333333
|
||||||
|
177,0.5,,1.00,,1,,0.833333333
|
||||||
|
178,0.5,,1.00,,1,,0.833333333
|
||||||
|
179,0.5,,1.00,,1,,0.833333333
|
||||||
|
180,0.5,,1.00,,1,,0.833333333
|
||||||
|
181,0.5,,1.00,,1,,0.833333333
|
||||||
|
182,0.5,,1.00,,1,,0.833333333
|
||||||
|
183,1,,1.00,,1,,1
|
||||||
|
184,0,Cookies error,0.50,,1,,0.5
|
||||||
|
185,1,,1.00,,1,,1
|
||||||
|
186,0.5,,0.00,,1,,0.5
|
||||||
|
187,0.5,,0.00,,1,,0.5
|
||||||
|
188,1,,1.00,,1,,1
|
||||||
|
189,1,,1.00,,1,,1
|
||||||
|
190,1,,1.00,,1,,1
|
||||||
|
191,0,,1.00,,1,,0.666666667
|
||||||
|
192,0.5,,0.00,,1,,0.5
|
||||||
|
193,1,,1.00,,1,,1
|
||||||
|
194,1,,0.00,,0.5,,0.5
|
||||||
|
195,0,,0.00,,1,,0.333333333
|
||||||
|
196,1,,0.00,,0.5,,0.5
|
||||||
|
197,0,,0.00,,1,,0.333333333
|
||||||
|
198,1,,0.00,,1,,0.666666667
|
||||||
|
199,0,,0.00,,1,,0.333333333
|
||||||
|
200,0,,0.50,,1,,0.5
|
||||||
|
201,0,,0.00,,0,,0
|
||||||
|
202,1,,1.00,,1,,1
|
||||||
|
203,0,,0.00,,1,,0.333333333
|
||||||
|
204,0.5,,0.00,,1,,0.5
|
||||||
|
205,1,,1.00,,1,,1
|
||||||
|
206,1,,1.00,,1,,1
|
||||||
|
207,1,,1.00,,1,,1
|
||||||
|
208,1,,1.00,,1,,1
|
||||||
|
209,1,,1.00,,1,,1
|
||||||
|
210,1,,1.00,,1,,1
|
||||||
|
211,1,,1.00,,1,,1
|
||||||
|
212,1,,1.00,,1,,1
|
||||||
|
213,0.5,,0.00,,0,,0.166666667
|
||||||
|
214,0.5,"error, can't acess product review",0.00,,1,,0.5
|
||||||
|
215,0.5,,0.00,,1,,0.5
|
||||||
|
216,0.5,,0.00,,0,,0.166666667
|
||||||
|
217,0,,0.00,,1,,0.333333333
|
||||||
|
218,0.5,,0.00,,0.5,,0.333333333
|
||||||
|
219,0,,1.00,,0,,0.333333333
|
||||||
|
220,0,,0.00,,0,,0
|
||||||
|
221,0,,0.00,,0,,0
|
||||||
|
222,0,,0.00,,0,,0
|
||||||
|
223,0,,0.00,,0,,0
|
||||||
|
224,0,,0.00,,0,,0
|
||||||
|
225,0.5,,0.00,,1,,0.5
|
||||||
|
226,0,,0.00,,1,,0.333333333
|
||||||
|
227,1,,1.00,,1,,1
|
||||||
|
228,0.5,,1.00,,1,,0.833333333
|
||||||
|
229,1,,1.00,,1,,1
|
||||||
|
230,0.5,,0.50,,1,,0.666666667
|
||||||
|
231,1,,1.00,,1,,1
|
||||||
|
232,1,,1.00,,1,,1
|
||||||
|
233,1,,1.00,,1,,1
|
||||||
|
234,1,,1.00,,1,,1
|
||||||
|
235,0,,1.00,,1,,0.666666667
|
||||||
|
236,1,,1.00,,1,,1
|
||||||
|
237,1,,0.00,,1,,0.666666667
|
||||||
|
238,0.5,,0.50,searching from lowest price,0.5,,0.5
|
||||||
|
239,0.5,,0.50,,0.5,,0.5
|
||||||
|
240,0,,0.00,,0,,0
|
||||||
|
241,1,,0.00,,0.5,,0.5
|
||||||
|
242,0,,0.00,,1,,0.333333333
|
||||||
|
243,0,,0.00,,0,,0
|
||||||
|
244,1,,1.00,,1,,1
|
||||||
|
245,0,,1.00,,1,,0.666666667
|
||||||
|
246,0,,0.00,,1,,0.333333333
|
||||||
|
247,0.5,,0.00,,1,,0.5
|
||||||
|
248,1,,1.00,,1,,1
|
||||||
|
249,0,,0.00,,0,,0
|
||||||
|
250,1,,1.00,,1,,1
|
||||||
|
251,1,,1.00,,1,,1
|
||||||
|
252,1,,1.00,,1,,1
|
||||||
|
253,0,website does not provide the information,0.50,,0.5,,0.333333333
|
||||||
|
254,1,,1.00,,1,,1
|
||||||
|
255,1,,1.00,,1,,1
|
||||||
|
256,1,,1.00,,1,,1
|
||||||
|
257,1,,1.00,,1,,1
|
||||||
|
258,1,,1.00,,1,,1
|
||||||
|
259,1,,1.00,,1,,1
|
||||||
|
260,0.5,,1.00,,1,,0.833333333
|
||||||
|
261,0,,1.00,,1,,0.666666667
|
||||||
|
262,0.5,,1.00,,1,,0.833333333
|
||||||
|
263,1,,1.00,,1,,1
|
||||||
|
264,1,,1.00,,1,,1
|
||||||
|
265,0,,0.00,,0,,0
|
||||||
|
266,0,,0.00,,0,,0
|
||||||
|
267,0,,0.00,,0,,0
|
||||||
|
268,1,,0.00,,1,,0.666666667
|
||||||
|
269,1,,0.00,,0.5,,0.5
|
||||||
|
270,1,,0.50,didn't show all,0,,0.5
|
||||||
|
271,0.5,,0.50,,1,,0.666666667
|
||||||
|
272,0.5,,0.50,,0.5,,0.5
|
||||||
|
273,0.5,,0.50,,1,,0.666666667
|
||||||
|
274,1,,0.00,,1,,0.666666667
|
||||||
|
275,1,,1.00,,1,,1
|
||||||
|
276,1,,1.00,,1,,1
|
||||||
|
277,1,,0.50,,1,,0.833333333
|
||||||
|
278,1,,0.50,,1,,0.833333333
|
||||||
|
279,1,,0.50,,1,,0.833333333
|
||||||
|
280,1,,0.50,,1,,0.833333333
|
||||||
|
281,0.5,,0.50,,1,,0.666666667
|
||||||
|
282,0.5,,0.50,,1,,0.666666667
|
||||||
|
283,1,,0.00,,1,,0.666666667
|
||||||
|
284,0.5,,1.00,didn't sort by price,1,,0.833333333
|
||||||
|
285,0.5,,1.00,,1,,0.833333333
|
||||||
|
286,0,,1.00,,1,,0.666666667
|
||||||
|
287,1,,1.00,,1,,1
|
||||||
|
288,0,,0.00,,0,,0
|
||||||
|
289,0,,0.00,,0,,0
|
||||||
|
290,0,,0.00,,0,,0
|
||||||
|
291,0,,0.00,,0,,0
|
||||||
|
292,0,,0.00,,0,,0
|
||||||
|
293,1,,1.00,,1,,1
|
||||||
|
294,1,,1.00,,1,,1
|
||||||
|
295,1,,1.00,,1,,1
|
||||||
|
296,1,,1.00,,0.5,,0.833333333
|
||||||
|
297,1,,0.00,,1,,0.666666667
|
||||||
|
298,0.5,,1.00,,1,,0.833333333
|
||||||
|
299,1,,1.00,,1,,1
|
||||||
|
300,0,,1.00,,1,,0.666666667
|
||||||
|
301,0,,0.00,,1,,0.333333333
|
||||||
|
302,0,,0.00,,0.5,,0.166666667
|
||||||
|
303,0.5,,1.00,,1,,0.833333333
|
||||||
|
304,1,,1.00,,1,,1
|
||||||
|
305,1,,1.00,,1,,1
|
||||||
|
306,0,,0.00,,0.5,,0.166666667
|
||||||
|
307,0,,0.00,,1,,0.333333333
|
||||||
|
308,1,,1.00,,1,,1
|
||||||
|
309,0,,0.00,similar tast but unable to replicate the steps did in last task,1,,0.333333333
|
||||||
|
310,1,,1.00,,1,,1
|
||||||
|
311,1,,1.00,,1,,1
|
||||||
|
312,1,,1.00,,1,,1
|
||||||
|
313,0,,0.00,no customer service number provided?,0.5,,0.166666667
|
||||||
|
314,1,,1.00,,1,,1
|
||||||
|
315,1,,1.00,,1,,1
|
||||||
|
316,1,,1.00,,1,,1
|
||||||
|
317,1,,1.00,,1,,1
|
||||||
|
318,1,,1.00,,1,,1
|
||||||
|
319,0.5,,1.00,,1,,0.833333333
|
||||||
|
320,1,,0.50,didn't list all,1,,0.833333333
|
||||||
|
321,0.5,,0.00,,0.5,,0.333333333
|
||||||
|
322,1,,1.00,,1,,1
|
||||||
|
323,0,,0.00,,1,,0.333333333
|
||||||
|
324,1,,1.00,,1,,1
|
||||||
|
325,1,,1.00,,1,,1
|
||||||
|
326,1,,1.00,,1,,1
|
||||||
|
327,1,,1.00,,1,,1
|
||||||
|
328,0.5,,0.50,,0.5,,0.5
|
||||||
|
329,1,,1.00,,1,,1
|
||||||
|
330,0.5,,0.50,didnt count all,1,,0.666666667
|
||||||
|
331,0.5,,1.00,,1,,0.833333333
|
||||||
|
332,0,,1.00,,0,,0.333333333
|
||||||
|
333,0.5,,0.00,,1,,0.5
|
||||||
|
334,0.5,,0.50,,1,,0.666666667
|
||||||
|
335,0.5,,0.50,,0,,0.333333333
|
||||||
|
336,0.5,,0.50,,1,,0.666666667
|
||||||
|
337,0.5,,0.50,,0.5,,0.5
|
||||||
|
338,1,,0.50,,1,,0.833333333
|
||||||
|
339,1,,1.00,,1,,1
|
||||||
|
340,0.5,,1.00,,1,,0.833333333
|
||||||
|
341,0,,1.00,,1,,0.666666667
|
||||||
|
342,1,,0.00,,1,,0.666666667
|
||||||
|
343,0,,0.00,,0,,0
|
||||||
|
344,0,,0.00,,1,,0.333333333
|
||||||
|
345,0,,0.00,,0,,0
|
||||||
|
346,1,,0.00,,1,,0.666666667
|
||||||
|
347,0,,0.00,,0,,0
|
||||||
|
348,1,,0.00,,1,,0.666666667
|
||||||
|
349,1,,1.00,,1,,1
|
||||||
|
350,1,,1.00,,1,,1
|
||||||
|
351,1,,1.00,,1,,1
|
||||||
|
352,1,,1.00,,1,,1
|
||||||
|
353,0.5,,0.50,,1,,0.666666667
|
||||||
|
354,1,,1.00,,1,,1
|
||||||
|
355,1,,0.00,,0,,0.333333333
|
||||||
|
356,0.5,,0.50,,0.5,,0.5
|
||||||
|
357,0.5,,1.00,,0,,0.5
|
||||||
|
358,1,,1.00,,1,,1
|
||||||
|
359,1,,1.00,,1,,1
|
||||||
|
360,1,,1.00,,1,,1
|
||||||
|
361,0.5,,1.00,,1,,0.833333333
|
||||||
|
362,1,,1.00,,0,,0.666666667
|
||||||
|
363,1,,1.00,,1,,1
|
||||||
|
364,1,,1.00,,1,,1
|
||||||
|
365,1,,1.00,,1,,1
|
||||||
|
366,0,,0.00,,0,,0
|
||||||
|
367,0,,0.00,,1,,0.333333333
|
||||||
|
368,0.5,,0.00,,1,,0.5
|
||||||
|
369,1,,0.00,is there any discount?,1,,0.666666667
|
||||||
|
370,1,,1.00,,1,,1
|
||||||
|
371,1,,1.00,,1,,1
|
||||||
|
372,1,,1.00,,1,,1
|
||||||
|
373,0.5,,1.00,,1,,0.833333333
|
||||||
|
374,1,q,0.50,correctly apply the theme but not able to preview,0.5,,0.666666667
|
||||||
|
375,1,,0.50,,0.5,,0.666666667
|
||||||
|
376,0,,1.00,,1,,0.666666667
|
||||||
|
377,0,,0.00,,0,,0
|
||||||
|
378,1,,0.00,,1,,0.666666667
|
||||||
|
379,1,,1.00,,1,,1
|
||||||
|
380,0,,0.00,,0,,0
|
||||||
|
381,1,,1.00,,1,,1
|
||||||
|
382,0,,0.00,,0,,0
|
||||||
|
383,1,,1.00,,1,,1
|
||||||
|
384,1,,1.00,,1,,1
|
||||||
|
385,1,,1.00,,1,,1
|
||||||
|
386,1,,1.00,,1,,1
|
||||||
|
387,1,,1.00,,1,,1
|
||||||
|
388,1,,1.00,,1,,1
|
||||||
|
389,1,,0.50,,0.5,,0.666666667
|
||||||
|
390,1,,0.50,,0.5,,0.666666667
|
||||||
|
391,1,,0.50,,1,,0.833333333
|
||||||
|
392,1,,0.50,,1,,0.833333333
|
||||||
|
393,1,,0.50,,0.5,,0.666666667
|
||||||
|
394,0,,1.00,,0,,0.333333333
|
||||||
|
395,1,,1.00,,1,,1
|
||||||
|
396,1,,1.00,,1,,1
|
||||||
|
397,1,,1.00,,1,,1
|
||||||
|
398,0,,1.00,,1,,0.666666667
|
||||||
|
399,1,,1.00,,1,,1
|
||||||
|
400,1,,1.00,,1,,1
|
||||||
|
401,0,,0.00,similar task but failed,0,,0
|
||||||
|
402,1,,1.00,,1,,1
|
||||||
|
403,1,,1.00,,1,,1
|
||||||
|
404,0,,0.00,didn't change the order to make sure the post is newest,1,,0.333333333
|
||||||
|
405,0,,0.00,,1,,0.333333333
|
||||||
|
406,0,,0.00,,1,,0.333333333
|
||||||
|
407,0.5,,0.00,,1,,0.5
|
||||||
|
408,0.5,,0.00,,1,,0.5
|
||||||
|
409,1,,1.00,,1,,1
|
||||||
|
410,1,,1.00,,1,,1
|
||||||
|
411,0,,0.00,successfylly nevigate to the right LICENSE but not able to upload the file,0,,0
|
||||||
|
412,0,,0.00,,0,,0
|
||||||
|
413,0,,0.00,,0,,0
|
||||||
|
414,1,,0.00,,1,,0.666666667
|
||||||
|
415,0.5,"ccompleted, but self-evaluation failed",0.50,did perform @ but the agent self cannot detect if the task is completed or not,0.5,,0.5
|
||||||
|
416,0.5,"ccompleted, but self-evaluation failed",0.00,-,0.5,,0.333333333
|
||||||
|
417,0,,0.00,,0,,0
|
||||||
|
418,1,,1.00,,1,,1
|
||||||
|
419,1,,1.00,,1,,1
|
||||||
|
420,1,,1.00,,1,,1
|
||||||
|
421,1,,1.00,,1,,1
|
||||||
|
422,1,,1.00,,1,,1
|
||||||
|
423,0,,0.00,,0,,0
|
||||||
|
424,1,,1.00,,1,,1
|
||||||
|
425,0,website does not provide the information,0.00,,0,,0
|
||||||
|
426,1,,0.00,,1,,0.666666667
|
||||||
|
427,0,website does not provide the information,0.00,,1,,0.333333333
|
||||||
|
428,0,start url error,0.00,,1,,0.333333333
|
||||||
|
429,1,,1.00,,1,,1
|
||||||
|
430,0,,0.00,,0,,0
|
||||||
|
431,0,,0.00,,0,,0
|
||||||
|
432,0,,1.00,,1,,0.666666667
|
||||||
|
433,0,,1.00,,1,,0.666666667
|
||||||
|
434,0.5,Click on the tablist failed,0.00,,1,,0.5
|
||||||
|
435,0,,0.00,,1,,0.333333333
|
||||||
|
436,0,,1.00,,1,,0.666666667
|
||||||
|
437,0.5,shopping cart,1.00,,1,,0.833333333
|
||||||
|
438,0,,0.00,,0,,0
|
||||||
|
439,0,,0.00,,0.5,,0.166666667
|
||||||
|
440,0,,0.00,,1,,0.333333333
|
||||||
|
441,0,,1.00,,1,,0.666666667
|
||||||
|
442,1,,0.00,,0,,0.333333333
|
||||||
|
443,0,,1.00,,1,,0.666666667
|
||||||
|
444,0,,1.00,,1,,0.666666667
|
||||||
|
445,0,,1.00,,1,,0.666666667
|
||||||
|
446,0.5,correct issue,0.00,,0.5,,0.333333333
|
||||||
|
447,0.5,correct issue,0.00,,0,,0.166666667
|
||||||
|
448,1,,1.00,,1,,1
|
||||||
|
449,1,,1.00,,1,,1
|
||||||
|
450,1,,1.00,,1,,1
|
||||||
|
451,1,,1.00,,0.5,,0.833333333
|
||||||
|
452,1,,1.00,,1,,1
|
||||||
|
453,1,,1.00,,1,,1
|
||||||
|
454,1,,1.00,,1,,1
|
||||||
|
455,1,,1.00,,1,,1
|
||||||
|
456,1,parachute?,1.00,,1,,1
|
||||||
|
457,0,twice,1.00,,1,,0.666666667
|
||||||
|
458,1,,1.00,,1,,1
|
||||||
|
459,1,,1.00,,1,,1
|
||||||
|
460,1,,1.00,,1,,1
|
||||||
|
461,1,,1.00,,1,,1
|
||||||
|
462,1,,1.00,,1,,1
|
||||||
|
463,1,,1.00,,1,,1
|
||||||
|
464,0,,0.00,,0,,0
|
||||||
|
465,1,,1.00,,1,,1
|
||||||
|
466,1,,1.00,,1,,1
|
||||||
|
467,1,,1.00,,1,,1
|
||||||
|
468,1,,1.00,,1,,1
|
||||||
|
469,1,,1.00,,1,,1
|
||||||
|
470,0,,0.00,,0.5,,0.166666667
|
||||||
|
471,0,,0.00,,0,,0
|
||||||
|
472,1,,1.00,,1,,1
|
||||||
|
473,1,,1.00,,1,,1
|
||||||
|
474,1,,1.00,,1,,1
|
||||||
|
475,1,,1.00,,1,,1
|
||||||
|
476,1,,1.00,,1,,1
|
||||||
|
477,1,,1.00,,1,,1
|
||||||
|
478,1,,1.00,,1,,1
|
||||||
|
479,1,,1.00,,0.5,,0.833333333
|
||||||
|
480,0,,0.00,,0,,0
|
||||||
|
481,0,,0.00,,0,,0
|
||||||
|
482,0,,0.00,,0,,0
|
||||||
|
483,0,,0.00,,0,,0
|
||||||
|
484,0,,0.00,,0,,0
|
||||||
|
485,0,,0.00,,0,,0
|
||||||
|
486,1,,1.00,,1,,1
|
||||||
|
487,1,,1.00,,1,,1
|
||||||
|
488,1,,1.00,,1,,1
|
||||||
|
489,1,,1.00,,1,,1
|
||||||
|
490,1,,1.00,,1,,1
|
||||||
|
491,0.5,sent email but not message,1.00,,1,,0.833333333
|
||||||
|
492,0.5,repeated process,0.00,,0.5,,0.333333333
|
||||||
|
493,0.5,sent email but not message,1.00,,1,,0.833333333
|
||||||
|
494,0.5,sent email but not message,0.00,,0.5,,0.333333333
|
||||||
|
495,1,,1.00,,1,,1
|
||||||
|
496,1,,1.00,,1,,1
|
||||||
|
497,1,,1.00,,0.5,,0.833333333
|
||||||
|
498,0.5,,0.00,,0,,0.166666667
|
||||||
|
499,1,,1.00,,1,,1
|
||||||
|
500,1,q,1.00,,1,,1
|
||||||
|
501,0.5,,1.00,,1,,0.833333333
|
||||||
|
502,0,,0.00,,1,,0.333333333
|
||||||
|
503,0.5,,1.00,,0.5,,0.666666667
|
||||||
|
504,0.5,,1.00,,0.5,,0.666666667
|
||||||
|
505,0,,1.00,,1,,0.666666667
|
||||||
|
506,0.5,,0.00,,1,,0.5
|
||||||
|
507,0.5,,0.00,,0,,0.166666667
|
||||||
|
508,1,,0.00,,1,,0.666666667
|
||||||
|
509,1,,0.00,,0.5,,0.5
|
||||||
|
510,0.5,,0.00,,0.5,,0.333333333
|
||||||
|
511,1,,0.00,didnt choose rating as order,1,,0.666666667
|
||||||
|
512,1,,1.00,,1,,1
|
||||||
|
513,1,,1.00,,1,,1
|
||||||
|
514,1,,1.00,,1,,1
|
||||||
|
515,1,,1.00,,1,,1
|
||||||
|
516,1,,1.00,,1,,1
|
||||||
|
517,1,,1.00,,1,,1
|
||||||
|
518,1,,1.00,,1,,1
|
||||||
|
519,1,,1.00,,1,,1
|
||||||
|
520,1,,1.00,,1,,1
|
||||||
|
521,1,,1.00,,1,,1
|
||||||
|
522,0,,0.00,,1,,0.333333333
|
||||||
|
523,0,,0.00,,0.5,,0.166666667
|
||||||
|
524,0.5,,0.00,,1,,0.5
|
||||||
|
525,0,,0.00,,1,,0.333333333
|
||||||
|
526,0.5,,0.00,,1,,0.5
|
||||||
|
527,1,,0.00,,1,,0.666666667
|
||||||
|
528,1,id,1.00,,1,,1
|
||||||
|
529,1,,1.00,,1,,1
|
||||||
|
530,1,,1.00,,1,,1
|
||||||
|
531,1,,1.00,,1,,1
|
||||||
|
532,1,,1.00,,1,,1
|
||||||
|
533,1,,1.00,,1,,1
|
||||||
|
534,1,,1.00,,1,,1
|
||||||
|
535,1,,1.00,,1,,1
|
||||||
|
536,1,,1.00,,1,,1
|
||||||
|
537,1,,1.00,,1,,1
|
||||||
|
538,1,,0.00,,1,,0.666666667
|
||||||
|
539,1,,1.00,,1,,1
|
||||||
|
540,1,,1.00,,1,,1
|
||||||
|
541,1,,1.00,,1,,1
|
||||||
|
542,1,,1.00,,1,,1
|
||||||
|
543,0,,0.00,,0.5,,0.166666667
|
||||||
|
544,0.5,,0.00,,0.5,,0.333333333
|
||||||
|
545,0,,0.00,,0.5,,0.166666667
|
||||||
|
546,0,,0.00,,0.5,,0.166666667
|
||||||
|
547,0.5,,0.00,,1,,0.5
|
||||||
|
548,0.5,"L, XL XS",1.00,,1,,0.833333333
|
||||||
|
549,0.5,,0.00,,0.5,,0.333333333
|
||||||
|
550,0.5,,0.00,,0.5,,0.333333333
|
||||||
|
551,1,,1.00,,1,,1
|
||||||
|
552,0.5,1 url ?,1.00,,1,,0.833333333
|
||||||
|
553,0.5,,0.00,,1,,0.5
|
||||||
|
554,0,,0.00,,1,,0.333333333
|
||||||
|
555,0,,0.00,,1,,0.333333333
|
||||||
|
556,0,,0.00,,0,,0
|
||||||
|
557,0,,0.00,,0,,0
|
||||||
|
558,0.5,,0.00,,0,,0.166666667
|
||||||
|
559,0,,0.00,,0,,0
|
||||||
|
560,0,,0.00,project url,0,,0
|
||||||
|
561,0,,0.00,,0.5,,0.166666667
|
||||||
|
562,0,,0.00,,0,,0
|
||||||
|
563,0,,0.00,,0.5,,0.166666667
|
||||||
|
564,0.5,,0.50,create the repo but not able to edit README,0,,0.333333333
|
||||||
|
565,0,,0.00,,0,,0
|
||||||
|
566,0,,0.00,,0,,0
|
||||||
|
567,0,,0.00,,0,,0
|
||||||
|
568,0,,0.00,,0,,0
|
||||||
|
569,0,,0.00,,0.5,,0.166666667
|
||||||
|
570,0,,0.00,,0.5,,0.166666667
|
||||||
|
571,1,,1.00,,1,,1
|
||||||
|
572,1,,1.00,,1,,1
|
||||||
|
573,1,,1.00,,1,,1
|
||||||
|
574,1,,1.00,,1,,1
|
||||||
|
575,1,,1.00,,1,,1
|
||||||
|
576,0,,0.00,,0,,0
|
||||||
|
577,0,,0.00,,0,,0
|
||||||
|
578,0,,0.00,,0,,0
|
||||||
|
579,0,,0.00,,0,,0
|
||||||
|
580,1,,1.00,,1,,1
|
||||||
|
581,1,,1.00,,1,,1
|
||||||
|
582,1,,1.00,,1,,1
|
||||||
|
583,1,,1.00,,1,,1
|
||||||
|
584,1,,1.00,,1,,1
|
||||||
|
585,0,,0.00,,0,,0
|
||||||
|
586,0,,0.00,search order one by one,0,,0
|
||||||
|
587,0,,0.00,,0,,0
|
||||||
|
588,0,,0.00,,0,,0
|
||||||
|
589,0,,0.00,,0,,0
|
||||||
|
590,1,,1.00,,0.5,,0.833333333
|
||||||
|
591,1,,1.00,,0.5,,0.833333333
|
||||||
|
592,1,,0.00,not able to set start date,0.5,,0.5
|
||||||
|
593,0,,0.00,,0.5,,0.166666667
|
||||||
|
594,1,minor error,0.00,not able to set due date,0.5,,0.5
|
||||||
|
595,0.5,,0.50,suscribed to the forum but didnt successfully open thread becase kept opening the pic so didn't subscribed the thread,1,?,0.666666667
|
||||||
|
596,1,,0.50,,1,?,0.833333333
|
||||||
|
597,1,,0.00,,1,?,0.666666667
|
||||||
|
598,1,,0.00,,1,?,0.666666667
|
||||||
|
599,1,,1.00,,1,?,1
|
||||||
|
600,1,,1.00,,1,,1
|
||||||
|
601,1,,1.00,,1,,1
|
||||||
|
602,1,,1.00,,1,,1
|
||||||
|
603,1,,1.00,,1,,1
|
||||||
|
604,1,,1.00,,1,,1
|
||||||
|
605,1,,1.00,,1,,1
|
||||||
|
606,1, ,1.00,,1,,1
|
||||||
|
607,1,,1.00,,1,,1
|
||||||
|
608,1,,1.00,,1,,1
|
||||||
|
609,1,,1.00,,1,,1
|
||||||
|
610,1,,1.00,,1,,1
|
||||||
|
611,1,,1.00,,1,,1
|
||||||
|
612,1,,1.00,,1,,1
|
||||||
|
613,1,,1.00,,1,,1
|
||||||
|
614,1,,1.00,,1,,1
|
||||||
|
615,0,,0.00,,0,,0
|
||||||
|
616,0,,0.50,,0,,0.166666667
|
||||||
|
617,0,,0.00,,0,,0
|
||||||
|
618,0,,0.00,,0,,0
|
||||||
|
619,0,,0.00,,0,,0
|
||||||
|
620,1,,1.00,,1,,1
|
||||||
|
621,1,,1.00,,1,,1
|
||||||
|
622,1,,1.00,,1,,1
|
||||||
|
623,1,,1.00,,1,,1
|
||||||
|
624,1,,1.00,,1,,1
|
||||||
|
625,1,,1.00,,1,,1
|
||||||
|
626,1,,1.00,,1,,1
|
||||||
|
627,0,,0.00,not able to enter title,0,,0
|
||||||
|
628,1,,1.00,,1,,1
|
||||||
|
629,1,,1.00,,1,,1
|
||||||
|
630,1,,1.00,,1,,1
|
||||||
|
631,1,,1.00,,1,,1
|
||||||
|
632,1,,1.00,,1,,1
|
||||||
|
633,1,,1.00,,1,,1
|
||||||
|
634,1,,1.00,,1,,1
|
||||||
|
635,1,,1.00,,1,,1
|
||||||
|
636,1,,1.00,,1,,1
|
||||||
|
637,1,,1.00,,1,,1
|
||||||
|
638,1,,1.00,,1,,1
|
||||||
|
639,1,,1.00,,1,,1
|
||||||
|
640,1,,1.00,,1,,1
|
||||||
|
641,1,,1.00,,1,,1
|
||||||
|
642,1,,1.00,,1,,1
|
||||||
|
643,1,,1.00,,1,,1
|
||||||
|
644,1,,1.00,,1,,1
|
||||||
|
645,1,,1.00,,1,,1
|
||||||
|
646,1,,1.00,,1,,1
|
||||||
|
647,1,,1.00,,1,,1
|
||||||
|
648,1,,1.00,,1,,1
|
||||||
|
649,1,,1.00,,1,,1
|
||||||
|
650,1,,1.00,,1,,1
|
||||||
|
651,1,,1.00,,1,,1
|
||||||
|
652,1,,1.00,,1,,1
|
||||||
|
653,1,,1.00,,1,,1
|
||||||
|
654,1,,1.00,,1,,1
|
||||||
|
655,1,,0.50,comment is weird,1,,0.833333333
|
||||||
|
656,1,,0.50,,1,,0.833333333
|
||||||
|
657,1,,0.50,,1,,0.833333333
|
||||||
|
658,0.5,,0.50,not able to scoll to 2030,0.5,,0.5
|
||||||
|
659,0.5,,0.50,,1,,0.666666667
|
||||||
|
660,0.5,,0.50,not able to assign to the person,1,,0.666666667
|
||||||
|
661,1,,1.00,,1,,1
|
||||||
|
662,1,,1.00,,1,,1
|
||||||
|
663,1,,1.00,,1,,1
|
||||||
|
664,1,,1.00,,1,,1
|
||||||
|
665,1,,1.00,,1,,1
|
||||||
|
666,0.5,,0.00,,1,,0.5
|
||||||
|
667,1,,1.00,,1,,1
|
||||||
|
668,0.5,,0.00,,1,,0.5
|
||||||
|
669,1,,1.00,,1,,1
|
||||||
|
670,1,,1.00,,1,,1
|
||||||
|
671,0.5,,0.00,didn't post in reddit,1,,0.5
|
||||||
|
672,0.5,,0.00,,1,,0.5
|
||||||
|
673,0.5,,0.00,,1,,0.5
|
||||||
|
674,0.5,,0.00,,1,,0.5
|
||||||
|
675,0.5,,0.00,,1,,0.5
|
||||||
|
676,1,?,1.00,,1,,1
|
||||||
|
677,1,,1.00,,1,,1
|
||||||
|
678,1,,1.00,,1,,1
|
||||||
|
679,1,,1.00,,1,,1
|
||||||
|
680,1,,1.00,,1,,1
|
||||||
|
681,0,,0.00,,0.5,,0.166666667
|
||||||
|
682,0,,0.00,,0.5,,0.166666667
|
||||||
|
683,0,,0.00,,0,,0
|
||||||
|
684,0,,0.00,,0,,0
|
||||||
|
685,0,,0.00,not able to related gitlab repo with reddit,1,,0.333333333
|
||||||
|
686,0,,0.00,,1,,0.333333333
|
||||||
|
687,0,,0.00,,1,,0.333333333
|
||||||
|
688,0,,0.00,,1,,0.333333333
|
||||||
|
689,1,,1.00,,1,,1
|
||||||
|
690,1,,1.00,,1,,1
|
||||||
|
691,1,,1.00,,1,,1
|
||||||
|
692,1,,1.00,,1,,1
|
||||||
|
693,1,,1.00,,1,,1
|
||||||
|
694,0.5,size,1.00,,1,,0.833333333
|
||||||
|
695,0.5,,1.00,,1,,0.833333333
|
||||||
|
696,0.5,,1.00,,0.5,,0.666666667
|
||||||
|
697,0.5,,1.00,,0.5,,0.666666667
|
||||||
|
698,0.5,,1.00,,1,,0.833333333
|
||||||
|
699,0.5,website ,0.00,,0,,0.166666667
|
||||||
|
700,0.5,,0.00,not able to stop within limited steps,0.5,,0.333333333
|
||||||
|
701,0.5,,1.00,,1,,0.833333333
|
||||||
|
702,0.5,,0.00,,0,,0.166666667
|
||||||
|
703,0,,0.50,not able to update ,0.5,,0.333333333
|
||||||
|
704,1,,0.50,,0.5,,0.666666667
|
||||||
|
705,1,,1.00,,1,,1
|
||||||
|
706,1,,0.00,,1,,0.666666667
|
||||||
|
707,1,,1.00,,1,,1
|
||||||
|
708,0.5,,0.00,,1,,0.5
|
||||||
|
709,1,successfully shows the results,0.00,,0.5,,0.5
|
||||||
|
710,1,,0.50,,0.5,,0.666666667
|
||||||
|
711,1,looks right,0.00,,0.5,,0.5
|
||||||
|
712,1,,0.00,,1,,0.666666667
|
||||||
|
713,1,,1.00,,1,,1
|
||||||
|
714,1,,1.00,,1,,1
|
||||||
|
715,0,,1.00,,0,,0.333333333
|
||||||
|
716,0.5,,1.00,,1,,0.833333333
|
||||||
|
717,0.5,should be r/movie,1.00,,1,,0.833333333
|
||||||
|
718,0.5,,0.00,,0.5,,0.333333333
|
||||||
|
719,0.5,,0.00,,1,,0.5
|
||||||
|
720,1,,0.00,,0.5,,0.5
|
||||||
|
721,1,,1.00,,0.5,,0.833333333
|
||||||
|
722,0.5,,1.00,,1,,0.833333333
|
||||||
|
723,0,,1.00,,0,,0.333333333
|
||||||
|
724,1,,0.00,,0,,0.333333333
|
||||||
|
725,1,,0.00,,0,,0.333333333
|
||||||
|
726,1,,0.00,,0,,0.333333333
|
||||||
|
727,1,,0.00,,0.5,,0.5
|
||||||
|
728,0,,0.00,,0,,0
|
||||||
|
729,1,,0.00,,0,,0.333333333
|
||||||
|
730,0.5,,0.00,,0,,0.166666667
|
||||||
|
731,0,,1.00,,1,,0.666666667
|
||||||
|
732,0,,1.00,,1,,0.666666667
|
||||||
|
733,0,,1.00,,1,,0.666666667
|
||||||
|
734,0,,1.00,,1,,0.666666667
|
||||||
|
735,0,,1.00,,1,,0.666666667
|
||||||
|
736,1,,1.00,,1,,1
|
||||||
|
737,1,,1.00,,1,,1
|
||||||
|
738,1,,1.00,,1,,1
|
||||||
|
739,1,,1.00,,1,,1
|
||||||
|
740,1,,1.00,,1,,1
|
||||||
|
741,1,,1.00,,0,,0.666666667
|
||||||
|
742,0.5,,0.00,,0,,0.166666667
|
||||||
|
743,0.5,,0.00,,0,,0.166666667
|
||||||
|
744,0.5,case sensitive,0.00,,0.5,,0.333333333
|
||||||
|
745,0.5,,0.00,,0,,0.166666667
|
||||||
|
746,0.5,,0.00,,0,,0.166666667
|
||||||
|
747,0,,0.00,,0,,0
|
||||||
|
748,0.5,,0.00,,0,,0.166666667
|
||||||
|
749,0,,0.00,,0,,0
|
||||||
|
750,0,,0.00,,0,,0
|
||||||
|
751,0,,0.00,,0,,0
|
||||||
|
752,0.5,,0.00,,0,,0.166666667
|
||||||
|
753,0,,0.00,,1,,0.333333333
|
||||||
|
754,0,,0.00,,1,,0.333333333
|
||||||
|
755,0,,1.00,,1,,0.666666667
|
||||||
|
756,0.5,,1.00,,1,,0.833333333
|
||||||
|
757,1,,1.00,,1,,1
|
||||||
|
758,1,,1.00,,1,,1
|
||||||
|
759,1,No enough information,1.00,,1,,1
|
||||||
|
760,1,,0.00,,0,,0.333333333
|
||||||
|
761,0,q,0.00,,0,,0
|
||||||
|
762,1,,0.00,,1,,0.666666667
|
||||||
|
763,1,,0.00,,1,,0.666666667
|
||||||
|
764,0,q,0.00,,0,,0
|
||||||
|
765,0,,0.00,,0,,0
|
||||||
|
766,0,,0.00,,0,,0
|
||||||
|
767,0,,0.00,,0,,0
|
||||||
|
768,0.5,,0.00,,1,,0.5
|
||||||
|
769,0.5,,0.00,didn't add on top of the original quantity,0,,0.166666667
|
||||||
|
770,1,,1.00,,1,,1
|
||||||
|
771,1,,1.00,,1,,1
|
||||||
|
772,0,,1.00,,1,,0.666666667
|
||||||
|
773,0,,0.00,,1,,0.333333333
|
||||||
|
774,0,,0.00,,0.5,,0.166666667
|
||||||
|
775,0,,0.00,,0,,0
|
||||||
|
776,0,,0.00,,0,,0
|
||||||
|
777,0.5,,0.00,,0,,0.166666667
|
||||||
|
778,1,,1.00,,1,,1
|
||||||
|
779,0,,1.00,,1,,0.666666667
|
||||||
|
780,1,,1.00,,1,,1
|
||||||
|
781,0,,0.00,,0,,0
|
||||||
|
782,0,,1.00,,1,,0.666666667
|
||||||
|
783,0,,0.00,,0,,0
|
||||||
|
784,1,,1.00,,1,,1
|
||||||
|
785,1,,1.00,,1,,1
|
||||||
|
786,1,,1.00,,1,,1
|
||||||
|
787,1,,1.00,,1,,1
|
||||||
|
788,1,,1.00,,1,,1
|
||||||
|
789,0,,0.00,,0.5,,0.166666667
|
||||||
|
790,0,,1.00,,0.5,,0.5
|
||||||
|
791,0,,1.00,,1,,0.666666667
|
||||||
|
792,0,,0.00,,1,,0.333333333
|
||||||
|
793,0,,1.00,,1,,0.666666667
|
||||||
|
794,0,,0.00,,0,,0
|
||||||
|
795,1,,1.00,,0.5,,0.833333333
|
||||||
|
796,1,,1.00,,0.5,,0.833333333
|
||||||
|
797,1,,1.00,,0.5,,0.833333333
|
||||||
|
798,0,,0.00,,0.5,,0.166666667
|
||||||
|
799,0.5,,0.00,,0,,0.166666667
|
||||||
|
800,0.5,,0.00,,0,,0.166666667
|
||||||
|
801,0.5,,0.00,,0.5,,0.333333333
|
||||||
|
802,0.5,,0.00,,0,,0.166666667
|
||||||
|
803,0.5,,0.50,,0,,0.333333333
|
||||||
|
804,0.5,,0.00,,0.5,,0.333333333
|
||||||
|
805,0.5,,0.00,,1,,0.5
|
||||||
|
806,1,,1.00,,1,,1
|
||||||
|
807,1,,0.00,,0,,0.333333333
|
||||||
|
808,1,,1.00,,1,,1
|
||||||
|
809,1,,1.00,,1,,1
|
||||||
|
810,0,,1.00,,1,,0.666666667
|
||||||
|
811,1,,1.00,,0,,0.666666667
|
||||||
|
Win Rate,0.619458128,,0.591748768,,0.740763547,,0.650656814
|
|
BIN
files/human_assessment/WebVoyager-AgentOccam-Alice.xlsx
Normal file
BIN
files/human_assessment/WebVoyager-AgentOccam-Alice.xlsx
Normal file
Binary file not shown.
BIN
files/human_assessment/WebVoyager-AgentOccam-Bob.xlsx
Normal file
BIN
files/human_assessment/WebVoyager-AgentOccam-Bob.xlsx
Normal file
Binary file not shown.
BIN
files/human_assessment/WebVoyager-AgentOccam-Colin.xlsx
Normal file
BIN
files/human_assessment/WebVoyager-AgentOccam-Colin.xlsx
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user