update
This commit is contained in:
parent
4b8dbfd9c5
commit
c078ba6292
|
@ -70,7 +70,7 @@ env:
|
|||
prune: true
|
||||
max_browser_rows: 500
|
||||
headless: True
|
||||
task_ids: ["stanford_cs_head", 65]
|
||||
task_ids: ["Allrecipes--3", 65]
|
||||
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
||||
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
||||
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
||||
|
|
|
@ -18,7 +18,7 @@ env:
|
|||
max_env_steps: 20
|
||||
max_browser_rows: 500
|
||||
headless: True
|
||||
task_ids: ["stanford_cs_head", 65]
|
||||
task_ids: ["Allrecipes--3", 65]
|
||||
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
||||
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
||||
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
||||
|
|
|
@ -70,7 +70,7 @@ env:
|
|||
prune: true
|
||||
max_browser_rows: 500
|
||||
headless: True
|
||||
task_ids: ["stanford_cs_head", 65]
|
||||
task_ids: ["Allrecipes--3", 65]
|
||||
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
||||
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
||||
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
||||
|
|
|
@ -18,7 +18,7 @@ env:
|
|||
max_env_steps: 20
|
||||
max_browser_rows: 500
|
||||
headless: True
|
||||
task_ids: ["stanford_cs_head", 65]
|
||||
task_ids: ["Allrecipes--3", 65]
|
||||
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
||||
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
||||
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
||||
|
|
|
@ -70,7 +70,7 @@ env:
|
|||
prune: true
|
||||
max_browser_rows: 500
|
||||
headless: True
|
||||
task_ids: ["stanford_cs_head", 65]
|
||||
task_ids: ["Allrecipes--3", 65]
|
||||
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
||||
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
||||
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
||||
|
|
|
@ -70,7 +70,7 @@ env:
|
|||
prune: true
|
||||
max_browser_rows: 500
|
||||
headless: True
|
||||
task_ids: ["stanford_cs_head", 65]
|
||||
task_ids: ["Allrecipes--3", 65]
|
||||
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
||||
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
||||
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
||||
|
|
|
@ -70,7 +70,7 @@ env:
|
|||
prune: false
|
||||
max_browser_rows: 500
|
||||
headless: True
|
||||
task_ids: ["stanford_cs_head", 65]
|
||||
task_ids: ["Allrecipes--3", 65]
|
||||
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
||||
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
||||
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
||||
|
|
|
@ -70,7 +70,7 @@ env:
|
|||
prune: false
|
||||
max_browser_rows: 500
|
||||
headless: True
|
||||
task_ids: ["stanford_cs_head", 65]
|
||||
task_ids: ["Allrecipes--3", 65]
|
||||
# a. "SHOPPING_ADMIN": [0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 41, 42, 43, 62, 63, 64, 65, 77, 78, 79, 94, 95, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 131, 157, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 243, 244, 245, 246, 247, 288, 289, 290, 291, 292, 344, 345, 346, 347, 348, 374, 375, 423, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 470, 471, 472, 473, 474, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 676, 677, 678, 679, 680, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 790]
|
||||
# b. "MAP": [7, 8, 9, 10, 16, 17, 18, 19, 20, 32, 33, 34, 35, 36, 37, 38, 39, 40, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 100, 101, 137, 138, 139, 140, 151, 152, 153, 154, 155, 218, 219, 220, 221, 222, 223, 224, 236, 237, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 265, 266, 267, 268, 287, 356, 363, 364, 365, 366, 367, 369, 370, 371, 372, 373, 377, 378, 379, 380, 381, 382, 383, 424, 425, 426, 427, 428, 429, 430, 737, 738, 739, 740, 741, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767]
|
||||
# c. "SHOPPING": [21, 22, 23, 24, 25, 26, 47, 48, 49, 50, 51, 96, 117, 118, 124, 125, 126, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 188, 189, 190, 191, 192, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 238, 239, 240, 241, 242, 260, 261, 262, 263, 264, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 298, 299, 300, 301, 302, 313, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 358, 359, 360, 361, 362, 368, 376, 384, 385, 386, 387, 388, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 465, 466, 467, 468, 469, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 571, 572, 573, 574, 575, 585, 586, 587, 588, 589, 653, 654, 655, 656, 657, 671, 672, 673, 674, 675, 689, 690, 691, 692, 693, 792, 793, 794, 795, 796, 797, 798]
|
||||
|
|
31
README.md
31
README.md
|
@ -1,17 +1,15 @@
|
|||
# AgentOccam
|
||||
Code for "[AgentOccam: A Simple Yet Strong Baseline for LLM-Based Web Agents]()".
|
||||
Code for "[AgentOccam: A Simple Yet Strong Baseline for LLM-Based Web Agents](https://arxiv.org/abs/2410.13825)" (ICLR 2025).
|
||||
|
||||

|
||||
|
||||
We work on automating web tasks! 🏄🏄🏄 We refine the LLM-based web agents by aligning their observation and action space with the capabilities of LLMs.
|
||||
AgentOccam offers a simple but strong baseline for LLM-based web agents. By providing a URL and the task you want it to perform, AgentOccam can execute it for you. Its simplicity and effectiveness allow you to run it directly, or adapt it into a larger pipeline for its executing web tasks, such as web information retrieval before processing the documents.
|
||||
|
||||
The newly designed agent AgentOccam surpasses previous state-of-the-art methods and concurrent work significantly w/o in-context examples, new agent roles, online feedback or search strategies on [WebArena](https://webarena.dev), a benchmark featuring general-purpose web tasks. 🍺
|
||||
**Without using in-context examples, new agent roles, online feedback, or search strategies**, AgentOccam demonstrates impressive performance on tasks in WebArena (a web simulator benchmark with tasks from sites like shopping, shopping admin, GitLab, Reddit, map, etc.) and tasks with golden answers in WebVoyager (a benchmark based on real web tasks), once surpassing the SOTA on both leaderboards.
|
||||
|
||||
We shed light on LLMs' impressive zero-shot performance on web tasks, and the critical role of carefully tuning observation and action spaces for LLM-based agents. 🧙
|
||||
In brief, our approach aligns the input (webpage descriptions, i.e., agent observations) and output (action strings that can be translated into web interactions, i.e., agent actions) of web tasks, with the tasks that LLMs are most familiar with, such as reading comprehension and question-answering. We refer to our approach **agent observation and action space alignment**, shedding light on LLMs' impressive zero-shot performance on web tasks, and **the critical role of carefully tuning observation and action spaces for LLM-based agents**.
|
||||
|
||||
You can let AgentOccam interact with other websites like Google per your requests by defining the task config files, as seen in the example in `config_files/tasks/standford_cs_head.json`. Have fun playing with it! :)
|
||||
|
||||
*Please check whether reddit post exceeds limits, login expires, or any other webarena simulator/website failure exists when you finish one round. You should restart the simluator/relogin to the websites and rerun those tasks before reporting your final success rate. Additionally, LLM policy varies even given the same task as the generation temperature is set to >0 for more diverse exploration. Therefore, it is expected that you can get difference traces when starting the same task multiple times. Try it out with the basic `config_files/tasks/standford_cs_head.json`!*
|
||||
You can let AgentOccam interact with other websites by defining task config files, as seen in the example in `config_files/tasks/Allrecipes--3.json`. Have fun playing with it! :)
|
||||
|
||||
## WebArena Replication
|
||||
### Environment Setup
|
||||
|
@ -31,7 +29,7 @@ mkdir .auth
|
|||
```
|
||||
|
||||
### Experiments
|
||||
#### AgentOccam-Series and SteP-Replication
|
||||
#### AgentOccam-Series and SteP-Replication (Please refer to SteP's official repo for their latest agent code.)
|
||||
* Connect to the WebArena host server.
|
||||
* Export the env configs:
|
||||
```bash
|
||||
|
@ -43,7 +41,7 @@ export MAP="http://<webarena_server_address>:3000"
|
|||
export WIKIPEDIA="http://<webarena_server_address>:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing"
|
||||
export HOMEPAGE="http://<webarena_server_address>:4399"
|
||||
export OPENAI_API_KEY="<openai_api_key>"
|
||||
export GEMINI_API_KEY="<gemini_api_key>"
|
||||
export GEMINI_API_KEY="<gemini_api_key>" # Optional, we provide several other agent base models, such as Claude and LLaMa.
|
||||
```
|
||||
* Login in:
|
||||
```bash
|
||||
|
@ -54,10 +52,16 @@ python browser_env/auto_login.py
|
|||
python eval_webarena.py --config AgentOccam/configs/AgentOccam.yml # Replace the yml config with your target one.
|
||||
```
|
||||
*You can use directly run `bash script/run_config.sh` after replacing the experiment configurations.*
|
||||
#### WebArena-Replication
|
||||
|
||||
*Please check whether reddit post exceeds limits, login expires, or any other webarena simulator/website failure exists when you finish one round at WebArena. Additionally, LLM policy varies even given the same task, as the generation temperature is set to >0 for more diverse exploration. Therefore, it is expected that you can get difference traces when starting the same task multiple times. Try it out with the basic `config_files/tasks/Allrecipes--3.json`.*
|
||||
#### WebArena-Agent (Please refer to WebArena's official repo for their latest agent code.)
|
||||
```bash
|
||||
bash scripts/run_webarena.sh
|
||||
```
|
||||
### Trajectories
|
||||
Placed at [this link](https://drive.google.com/drive/folders/1MjnDIlfPGPjMFszirQO46fdP4LkH6669?usp=sharing).
|
||||
### Human Assessment
|
||||
Placed at `files/human_assessment/WebArena-AgentOccam.csv`.
|
||||
|
||||
## WebVoyager Replication
|
||||
### Environment Setup
|
||||
|
@ -82,11 +86,14 @@ cd ../AgentOccam
|
|||
```bash
|
||||
python eval_webarena.py --config AgentOccam/configs/AgentOccam-WebVoyager.yml
|
||||
```
|
||||
#### Agent-E
|
||||
#### Agent-E (Please refer to Agent-E's official repo for their latest agent code.)
|
||||
```bash
|
||||
python -m agente_replication --task_ids Allrecipes--3
|
||||
```
|
||||
|
||||
### Trajectories
|
||||
Placed at [this link](https://drive.google.com/drive/folders/1d5xPdOhYWjDTJqLmehdXs2AbtWCc4SXB?usp=sharing).
|
||||
### Human Assessment
|
||||
Placed at `files/human_assessment/WebVoyager-AgentOccam_*.csv`.
|
||||
|
||||
|
||||
## Agent Configuration Explanation
|
||||
|
|
20
config_files/tasks/Allrecipes--3.json
Normal file
20
config_files/tasks/Allrecipes--3.json
Normal file
|
@ -0,0 +1,20 @@
|
|||
{
|
||||
"sites": [
|
||||
"allrecipes"
|
||||
],
|
||||
"task_id": "Allrecipes--3",
|
||||
"require_login": false,
|
||||
"storage_state": null,
|
||||
"start_url": "https://www.allrecipes.com/",
|
||||
"geolocation": null,
|
||||
"intent": "Locate a recipe for vegan chocolate chip cookies with over 60 reviews and a rating of at least 4.5 stars on Allrecipes.",
|
||||
"require_reset": false,
|
||||
"eval": {
|
||||
"eval_types": [
|
||||
"string_match"
|
||||
],
|
||||
"reference_answers": {
|
||||
"fuzzy_match": "'Vegan Chocolate Chip, Oatmeal, and Nut Cookies', 4.9 star, 67 viewers (> 60)"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
{
|
||||
"sites": [
|
||||
"google"
|
||||
],
|
||||
"task_id": "stanford_cs_head",
|
||||
"require_login": false,
|
||||
"storage_state": null,
|
||||
"start_url": "https://www.google.com/",
|
||||
"geolocation": null,
|
||||
"intent": "What is the email address of the Dean of the School of Engineering at Stanford University?",
|
||||
"require_reset": false,
|
||||
"eval": {
|
||||
"eval_types": [
|
||||
"string_match"
|
||||
],
|
||||
"reference_answers": {
|
||||
"exact_match": "widom@stanford.edu"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
{
|
||||
"sites": [
|
||||
"google"
|
||||
],
|
||||
"task_id": "tsinghua_da_head",
|
||||
"require_login": false,
|
||||
"storage_state": null,
|
||||
"start_url": "https://www.google.com/",
|
||||
"geolocation": null,
|
||||
"intent": "帮我查一下清华大学自动化系系主任的邮箱",
|
||||
"require_reset": false,
|
||||
"eval": {
|
||||
"eval_types": [
|
||||
"string_match"
|
||||
],
|
||||
"reference_answers": {
|
||||
"exact_match": "taozhang@tsinghua.edu.cn"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -3809,7 +3809,8 @@
|
|||
"locator": "",
|
||||
"required_contents": {
|
||||
"must_include": [
|
||||
"jaw bruxism",
|
||||
"jaw",
|
||||
"bruxism",
|
||||
"mouth guard"
|
||||
]
|
||||
}
|
||||
|
@ -28661,7 +28662,7 @@
|
|||
],
|
||||
"reference_answers": {
|
||||
"must_include": [
|
||||
"412"
|
||||
"414"
|
||||
]
|
||||
},
|
||||
"reference_url": "",
|
||||
|
|
814
files/human_assessment/WebArena-AgentOccam.csv
Normal file
814
files/human_assessment/WebArena-AgentOccam.csv
Normal file
|
@ -0,0 +1,814 @@
|
|||
Task ID,Alice (Correct: 1; Partially Correct: 0.5; Incorrect: 0),Alice's Note,Bob,Bob's Note,Colin,Colin's Note,Average
|
||||
0,0.5,,1.00,,1,,0.833333333
|
||||
1,1,,1.00,,1,,1
|
||||
2,0.5,,0.50,product type refers to the general category not the products,1,,0.666666667
|
||||
3,1,,1.00,,1,,1
|
||||
4,1,,1.00,,1,,1
|
||||
5,1,,0.50,product type is different from product,1,,0.833333333
|
||||
6,1,,1.00,,1,"Uncertainty exists for 0-6, as the query time cannot be determined.",1
|
||||
7,0,,0.00,"failed to click ""Go"" due to the FROM field wasn't actually filled",0,,0
|
||||
8,1,,0.50,identify there's no airport within 5km but incorrectly identify the nearst international airport prob due to openstreet api seatch function,0,,0.5
|
||||
9,1,,1.00,,0,,0.666666667
|
||||
10,0,,0.50,identify the limitation of the openstreet api but didn't give correct answer for this task,0,,0.166666667
|
||||
11,1,,0.00,didn't type and search in the correct field,1,,0.666666667
|
||||
12,0,,0.00,didn't go to the correct tab,0,,0
|
||||
13,1,,1.00,,1,,1
|
||||
14,0,,0.00,didn't type and search in the correct field,0,,0
|
||||
15,1,,1.00,,1,,1
|
||||
16,1,,1.00,,1,,1
|
||||
17,1,,1.00,,1,,1
|
||||
18,1,,1.00,,1,,1
|
||||
19,1,,1.00,,1,,1
|
||||
20,1,,1.00,,1,,1
|
||||
21,1,,1.00,,0.5,,0.833333333
|
||||
22,1,,1.00,,0.5,,0.833333333
|
||||
23,1,,1.00,,0.5,,0.833333333
|
||||
24,1,,1.00,,1,,1
|
||||
25,1,,0.50,can't detect implicitly mentioned related to average print quality,1,,0.833333333
|
||||
26,1,,0.00,filed to identify customer service complaints,1,,0.666666667
|
||||
27,0,,0.00,,0,,0
|
||||
28,0,q,0.00,didn't quite perfomed the task related to that forum,1,,0.333333333
|
||||
29,0,q,0.00,,0,,0
|
||||
30,0,error,0.00,,0,,0
|
||||
31,0,,0.00,,0,,0
|
||||
32,1,,1.00,,1,,1
|
||||
33,0,error,0.00,tried to go to Google Map but failed,0,,0
|
||||
34,0,,0.00,,0,,0
|
||||
35,0,,0.00,,0,,0
|
||||
36,1,,1.00,,1,,1
|
||||
37,0,,0.00,,0,,0
|
||||
38,1,,1.00,,1,,1
|
||||
39,0,,0.50,openstreet api,0,,0.166666667
|
||||
40,0,,0.50,,0,,0.166666667
|
||||
41,1,,1.00,,0,,0.666666667
|
||||
42,1,,1.00,,1,,1
|
||||
43,0,,1.00,since nike has no result in the store,1,,0.666666667
|
||||
44,1,,1.00,,0.5,,0.833333333
|
||||
45,1,,1.00,,1,,1
|
||||
46,1,,1.00,,0.5,,0.833333333
|
||||
47,0,,1.00,,1,,0.666666667
|
||||
48,1,,1.00,,1,,1
|
||||
49,0,,1.00,,1,,0.666666667
|
||||
50,0,,0.00,should be more than 16,1,,0.333333333
|
||||
51,0,,0.50,included pending orders,1,,0.5
|
||||
52,1,,1.00,,1,,1
|
||||
53,1,,1.00,,1,,1
|
||||
54,1,,1.00,,1,,1
|
||||
55,0,,0.50,,0,,0.166666667
|
||||
56,0,,0.50,,0.5,,0.333333333
|
||||
57,1,,1.00,,1,,1
|
||||
58,0,,1.00,,0,,0.333333333
|
||||
59,0,,1.00,,0,,0.333333333
|
||||
60,0,,1.00,,0,,0.333333333
|
||||
61,0,,1.00,,0,,0.333333333
|
||||
62,1,,1.00,,1,,1
|
||||
63,1,,1.00,,1,,1
|
||||
64,0,,0.00,,0.5,,0.166666667
|
||||
65,0,,0.00,,0,,0
|
||||
66,1,,0.50,didnt identify all,1,,0.833333333
|
||||
67,1,,0.50,,1,,0.833333333
|
||||
68,1,,0.50,,0.5,,0.666666667
|
||||
69,1,,1.00,,1,,1
|
||||
70,1,,1.00,,1,,1
|
||||
71,1,,1.00,,1,,1
|
||||
72,0.5,,0.00,,1,,0.5
|
||||
73,1,,1.00,,1,,1
|
||||
74,0.5,,0.00,wrong order,1,,0.5
|
||||
75,0.5,,0.00,,0.5,,0.333333333
|
||||
76,0.5,,1.00,,0.5,,0.666666667
|
||||
77,1,,0.00,,1,,0.666666667
|
||||
78,0,,0.00,,1,,0.333333333
|
||||
79,0,,0.00,,1,,0.333333333
|
||||
80,0,,0.00,Starbuck on Craig St?,1,,0.333333333
|
||||
81,0,,0.00,,0,,0
|
||||
82,1,,1.00,,1,,1
|
||||
83,0,,0.00,,0,,0
|
||||
84,1,,1.00,,1,,1
|
||||
85,1,,1.00,use airport to estimate,1,,1
|
||||
86,1,,1.00,,1,,1
|
||||
87,1,,1.00,,1,,1
|
||||
88,1,,1.00,,1,,1
|
||||
89,0,,0.00,unable to access Wiki,0,,0
|
||||
90,0.5,,1.00,,1,,0.833333333
|
||||
91,0.5,,1.00,,1,,0.833333333
|
||||
92,0.5,,1.00,,1,,0.833333333
|
||||
93,0,,1.00,,1,,0.666666667
|
||||
94,1,,1.00,,1,,1
|
||||
95,1,,1.00,,1,,1
|
||||
96,1,,1.00,,0.5,,0.833333333
|
||||
97,1,,1.00,,1,,1
|
||||
98,1,,0.50,seems there are nearer?,1,,0.833333333
|
||||
99,0,,0.00,,0,,0
|
||||
100,0,,0.00,,0,,0
|
||||
101,0,,0.00,,0,,0
|
||||
102,0.5,,0.00,help needed / help wanted,0,,0.166666667
|
||||
103,0,,0.00,,0,,0
|
||||
104,0,,0.00,,0,,0
|
||||
105,0,,0.00,,0.5,,0.166666667
|
||||
106,0,,0.00,,0,,0
|
||||
107,0.5,,0.00,,1,,0.5
|
||||
108,0.5,,0.00,,1,,0.5
|
||||
109,0.5,,0.00,,0.5,,0.333333333
|
||||
110,0.5,,0.00,,0,,0.166666667
|
||||
111,0.5,,0.50,didnt count right,1,,0.666666667
|
||||
112,0,,0.00,,0.5,,0.166666667
|
||||
113,0,,0.00,,0.5,,0.166666667
|
||||
114,0,,0.00,,0.5,,0.166666667
|
||||
115,0,,0.00,,0.5,,0.166666667
|
||||
116,0.5,,0.00,,0.5,,0.333333333
|
||||
117,0,,0.00,,0,,0
|
||||
118,1,,1.00,,1,,1
|
||||
119,1,,1.00,,1,,1
|
||||
120,1,,1.00,,1,,1
|
||||
121,1,,0.50,also included why people dont like it,1,,0.833333333
|
||||
122,1,,1.00,,1,,1
|
||||
123,0,,0.00,,1,,0.333333333
|
||||
124,0,,0.00,only examined one page,0.5,,0.166666667
|
||||
125,0,,0.00,,1,,0.333333333
|
||||
126,0,,0.00,,0.5,,0.166666667
|
||||
127,1,,1.00,,1,,1
|
||||
128,1,,1.00,,1,,1
|
||||
129,1,,1.00,,1,,1
|
||||
130,1,,1.00,,1,,1
|
||||
131,0.5,,0.50,didn't finish,1,,0.666666667
|
||||
132,1,,1.00,,1,,1
|
||||
133,1,,1.00,,1,,1
|
||||
134,1,,1.00,,1,,1
|
||||
135,1,,1.00,repetition of action,1,,1
|
||||
136,1,,0.00,,1,,0.666666667
|
||||
137,1,,1.00,,1,,1
|
||||
138,1,,1.00,,1,,1
|
||||
139,1,,1.00,,1,,1
|
||||
140,1,,1.00,,1,,1
|
||||
141,0.5,,0.50,tried to look into one transection type,1,,0.666666667
|
||||
142,0.5,,0.00,,1,,0.5
|
||||
143,0.5,,0.00,,1,,0.5
|
||||
144,0.5,,0.00,,1,,0.5
|
||||
145,0.5,,0.00,,1,,0.5
|
||||
146,1,,1.00,,1,,1
|
||||
147,0,,0.50,,0,,0.166666667
|
||||
148,0,,1.00,,1,,0.666666667
|
||||
149,0.5,,1.00,,1,,0.833333333
|
||||
150,1,,1.00,,1,,1
|
||||
151,1,,1.00,,1,,1
|
||||
152,1,,1.00,,1,,1
|
||||
153,1,,1.00,,1,,1
|
||||
154,1,,1.00,,1,,1
|
||||
155,1,,1.00,,1,,1
|
||||
156,1,,1.00,,1,,1
|
||||
157,1,,1.00,,1,,1
|
||||
158,1,,1.00,,1,,1
|
||||
159,1,,1.00,,1,,1
|
||||
160,1,,1.00,,1,,1
|
||||
161,1,,1.00,,1,,1
|
||||
162,0.5,,1.00,,0,,0.5
|
||||
163,1,,0.00,,1,,0.666666667
|
||||
164,1,,1.00,,1,,1
|
||||
165,1,,1.00,,1,,1
|
||||
166,1,,1.00,,1,,1
|
||||
167,1,,1.00,,1,,1
|
||||
168,0,,0.00,,1,,0.333333333
|
||||
169,0,,1.00,,1,,0.666666667
|
||||
170,0.5,,1.00,,1,,0.833333333
|
||||
171,0,,0.00,,0,,0
|
||||
172,0,,1.00,,1,,0.666666667
|
||||
173,1,,1.00,,1,,1
|
||||
174,0.5,it only looks for open issues instead of all,1.00,,1,,0.833333333
|
||||
175,0.5,,1.00,,1,,0.833333333
|
||||
176,0.5,,1.00,,1,,0.833333333
|
||||
177,0.5,,1.00,,1,,0.833333333
|
||||
178,0.5,,1.00,,1,,0.833333333
|
||||
179,0.5,,1.00,,1,,0.833333333
|
||||
180,0.5,,1.00,,1,,0.833333333
|
||||
181,0.5,,1.00,,1,,0.833333333
|
||||
182,0.5,,1.00,,1,,0.833333333
|
||||
183,1,,1.00,,1,,1
|
||||
184,0,Cookies error,0.50,,1,,0.5
|
||||
185,1,,1.00,,1,,1
|
||||
186,0.5,,0.00,,1,,0.5
|
||||
187,0.5,,0.00,,1,,0.5
|
||||
188,1,,1.00,,1,,1
|
||||
189,1,,1.00,,1,,1
|
||||
190,1,,1.00,,1,,1
|
||||
191,0,,1.00,,1,,0.666666667
|
||||
192,0.5,,0.00,,1,,0.5
|
||||
193,1,,1.00,,1,,1
|
||||
194,1,,0.00,,0.5,,0.5
|
||||
195,0,,0.00,,1,,0.333333333
|
||||
196,1,,0.00,,0.5,,0.5
|
||||
197,0,,0.00,,1,,0.333333333
|
||||
198,1,,0.00,,1,,0.666666667
|
||||
199,0,,0.00,,1,,0.333333333
|
||||
200,0,,0.50,,1,,0.5
|
||||
201,0,,0.00,,0,,0
|
||||
202,1,,1.00,,1,,1
|
||||
203,0,,0.00,,1,,0.333333333
|
||||
204,0.5,,0.00,,1,,0.5
|
||||
205,1,,1.00,,1,,1
|
||||
206,1,,1.00,,1,,1
|
||||
207,1,,1.00,,1,,1
|
||||
208,1,,1.00,,1,,1
|
||||
209,1,,1.00,,1,,1
|
||||
210,1,,1.00,,1,,1
|
||||
211,1,,1.00,,1,,1
|
||||
212,1,,1.00,,1,,1
|
||||
213,0.5,,0.00,,0,,0.166666667
|
||||
214,0.5,"error, can't acess product review",0.00,,1,,0.5
|
||||
215,0.5,,0.00,,1,,0.5
|
||||
216,0.5,,0.00,,0,,0.166666667
|
||||
217,0,,0.00,,1,,0.333333333
|
||||
218,0.5,,0.00,,0.5,,0.333333333
|
||||
219,0,,1.00,,0,,0.333333333
|
||||
220,0,,0.00,,0,,0
|
||||
221,0,,0.00,,0,,0
|
||||
222,0,,0.00,,0,,0
|
||||
223,0,,0.00,,0,,0
|
||||
224,0,,0.00,,0,,0
|
||||
225,0.5,,0.00,,1,,0.5
|
||||
226,0,,0.00,,1,,0.333333333
|
||||
227,1,,1.00,,1,,1
|
||||
228,0.5,,1.00,,1,,0.833333333
|
||||
229,1,,1.00,,1,,1
|
||||
230,0.5,,0.50,,1,,0.666666667
|
||||
231,1,,1.00,,1,,1
|
||||
232,1,,1.00,,1,,1
|
||||
233,1,,1.00,,1,,1
|
||||
234,1,,1.00,,1,,1
|
||||
235,0,,1.00,,1,,0.666666667
|
||||
236,1,,1.00,,1,,1
|
||||
237,1,,0.00,,1,,0.666666667
|
||||
238,0.5,,0.50,searching from lowest price,0.5,,0.5
|
||||
239,0.5,,0.50,,0.5,,0.5
|
||||
240,0,,0.00,,0,,0
|
||||
241,1,,0.00,,0.5,,0.5
|
||||
242,0,,0.00,,1,,0.333333333
|
||||
243,0,,0.00,,0,,0
|
||||
244,1,,1.00,,1,,1
|
||||
245,0,,1.00,,1,,0.666666667
|
||||
246,0,,0.00,,1,,0.333333333
|
||||
247,0.5,,0.00,,1,,0.5
|
||||
248,1,,1.00,,1,,1
|
||||
249,0,,0.00,,0,,0
|
||||
250,1,,1.00,,1,,1
|
||||
251,1,,1.00,,1,,1
|
||||
252,1,,1.00,,1,,1
|
||||
253,0,website does not provide the information,0.50,,0.5,,0.333333333
|
||||
254,1,,1.00,,1,,1
|
||||
255,1,,1.00,,1,,1
|
||||
256,1,,1.00,,1,,1
|
||||
257,1,,1.00,,1,,1
|
||||
258,1,,1.00,,1,,1
|
||||
259,1,,1.00,,1,,1
|
||||
260,0.5,,1.00,,1,,0.833333333
|
||||
261,0,,1.00,,1,,0.666666667
|
||||
262,0.5,,1.00,,1,,0.833333333
|
||||
263,1,,1.00,,1,,1
|
||||
264,1,,1.00,,1,,1
|
||||
265,0,,0.00,,0,,0
|
||||
266,0,,0.00,,0,,0
|
||||
267,0,,0.00,,0,,0
|
||||
268,1,,0.00,,1,,0.666666667
|
||||
269,1,,0.00,,0.5,,0.5
|
||||
270,1,,0.50,didn't show all,0,,0.5
|
||||
271,0.5,,0.50,,1,,0.666666667
|
||||
272,0.5,,0.50,,0.5,,0.5
|
||||
273,0.5,,0.50,,1,,0.666666667
|
||||
274,1,,0.00,,1,,0.666666667
|
||||
275,1,,1.00,,1,,1
|
||||
276,1,,1.00,,1,,1
|
||||
277,1,,0.50,,1,,0.833333333
|
||||
278,1,,0.50,,1,,0.833333333
|
||||
279,1,,0.50,,1,,0.833333333
|
||||
280,1,,0.50,,1,,0.833333333
|
||||
281,0.5,,0.50,,1,,0.666666667
|
||||
282,0.5,,0.50,,1,,0.666666667
|
||||
283,1,,0.00,,1,,0.666666667
|
||||
284,0.5,,1.00,didn't sort by price,1,,0.833333333
|
||||
285,0.5,,1.00,,1,,0.833333333
|
||||
286,0,,1.00,,1,,0.666666667
|
||||
287,1,,1.00,,1,,1
|
||||
288,0,,0.00,,0,,0
|
||||
289,0,,0.00,,0,,0
|
||||
290,0,,0.00,,0,,0
|
||||
291,0,,0.00,,0,,0
|
||||
292,0,,0.00,,0,,0
|
||||
293,1,,1.00,,1,,1
|
||||
294,1,,1.00,,1,,1
|
||||
295,1,,1.00,,1,,1
|
||||
296,1,,1.00,,0.5,,0.833333333
|
||||
297,1,,0.00,,1,,0.666666667
|
||||
298,0.5,,1.00,,1,,0.833333333
|
||||
299,1,,1.00,,1,,1
|
||||
300,0,,1.00,,1,,0.666666667
|
||||
301,0,,0.00,,1,,0.333333333
|
||||
302,0,,0.00,,0.5,,0.166666667
|
||||
303,0.5,,1.00,,1,,0.833333333
|
||||
304,1,,1.00,,1,,1
|
||||
305,1,,1.00,,1,,1
|
||||
306,0,,0.00,,0.5,,0.166666667
|
||||
307,0,,0.00,,1,,0.333333333
|
||||
308,1,,1.00,,1,,1
|
||||
309,0,,0.00,similar tast but unable to replicate the steps did in last task,1,,0.333333333
|
||||
310,1,,1.00,,1,,1
|
||||
311,1,,1.00,,1,,1
|
||||
312,1,,1.00,,1,,1
|
||||
313,0,,0.00,no customer service number provided?,0.5,,0.166666667
|
||||
314,1,,1.00,,1,,1
|
||||
315,1,,1.00,,1,,1
|
||||
316,1,,1.00,,1,,1
|
||||
317,1,,1.00,,1,,1
|
||||
318,1,,1.00,,1,,1
|
||||
319,0.5,,1.00,,1,,0.833333333
|
||||
320,1,,0.50,didn't list all,1,,0.833333333
|
||||
321,0.5,,0.00,,0.5,,0.333333333
|
||||
322,1,,1.00,,1,,1
|
||||
323,0,,0.00,,1,,0.333333333
|
||||
324,1,,1.00,,1,,1
|
||||
325,1,,1.00,,1,,1
|
||||
326,1,,1.00,,1,,1
|
||||
327,1,,1.00,,1,,1
|
||||
328,0.5,,0.50,,0.5,,0.5
|
||||
329,1,,1.00,,1,,1
|
||||
330,0.5,,0.50,didnt count all,1,,0.666666667
|
||||
331,0.5,,1.00,,1,,0.833333333
|
||||
332,0,,1.00,,0,,0.333333333
|
||||
333,0.5,,0.00,,1,,0.5
|
||||
334,0.5,,0.50,,1,,0.666666667
|
||||
335,0.5,,0.50,,0,,0.333333333
|
||||
336,0.5,,0.50,,1,,0.666666667
|
||||
337,0.5,,0.50,,0.5,,0.5
|
||||
338,1,,0.50,,1,,0.833333333
|
||||
339,1,,1.00,,1,,1
|
||||
340,0.5,,1.00,,1,,0.833333333
|
||||
341,0,,1.00,,1,,0.666666667
|
||||
342,1,,0.00,,1,,0.666666667
|
||||
343,0,,0.00,,0,,0
|
||||
344,0,,0.00,,1,,0.333333333
|
||||
345,0,,0.00,,0,,0
|
||||
346,1,,0.00,,1,,0.666666667
|
||||
347,0,,0.00,,0,,0
|
||||
348,1,,0.00,,1,,0.666666667
|
||||
349,1,,1.00,,1,,1
|
||||
350,1,,1.00,,1,,1
|
||||
351,1,,1.00,,1,,1
|
||||
352,1,,1.00,,1,,1
|
||||
353,0.5,,0.50,,1,,0.666666667
|
||||
354,1,,1.00,,1,,1
|
||||
355,1,,0.00,,0,,0.333333333
|
||||
356,0.5,,0.50,,0.5,,0.5
|
||||
357,0.5,,1.00,,0,,0.5
|
||||
358,1,,1.00,,1,,1
|
||||
359,1,,1.00,,1,,1
|
||||
360,1,,1.00,,1,,1
|
||||
361,0.5,,1.00,,1,,0.833333333
|
||||
362,1,,1.00,,0,,0.666666667
|
||||
363,1,,1.00,,1,,1
|
||||
364,1,,1.00,,1,,1
|
||||
365,1,,1.00,,1,,1
|
||||
366,0,,0.00,,0,,0
|
||||
367,0,,0.00,,1,,0.333333333
|
||||
368,0.5,,0.00,,1,,0.5
|
||||
369,1,,0.00,is there any discount?,1,,0.666666667
|
||||
370,1,,1.00,,1,,1
|
||||
371,1,,1.00,,1,,1
|
||||
372,1,,1.00,,1,,1
|
||||
373,0.5,,1.00,,1,,0.833333333
|
||||
374,1,q,0.50,correctly apply the theme but not able to preview,0.5,,0.666666667
|
||||
375,1,,0.50,,0.5,,0.666666667
|
||||
376,0,,1.00,,1,,0.666666667
|
||||
377,0,,0.00,,0,,0
|
||||
378,1,,0.00,,1,,0.666666667
|
||||
379,1,,1.00,,1,,1
|
||||
380,0,,0.00,,0,,0
|
||||
381,1,,1.00,,1,,1
|
||||
382,0,,0.00,,0,,0
|
||||
383,1,,1.00,,1,,1
|
||||
384,1,,1.00,,1,,1
|
||||
385,1,,1.00,,1,,1
|
||||
386,1,,1.00,,1,,1
|
||||
387,1,,1.00,,1,,1
|
||||
388,1,,1.00,,1,,1
|
||||
389,1,,0.50,,0.5,,0.666666667
|
||||
390,1,,0.50,,0.5,,0.666666667
|
||||
391,1,,0.50,,1,,0.833333333
|
||||
392,1,,0.50,,1,,0.833333333
|
||||
393,1,,0.50,,0.5,,0.666666667
|
||||
394,0,,1.00,,0,,0.333333333
|
||||
395,1,,1.00,,1,,1
|
||||
396,1,,1.00,,1,,1
|
||||
397,1,,1.00,,1,,1
|
||||
398,0,,1.00,,1,,0.666666667
|
||||
399,1,,1.00,,1,,1
|
||||
400,1,,1.00,,1,,1
|
||||
401,0,,0.00,similar task but failed,0,,0
|
||||
402,1,,1.00,,1,,1
|
||||
403,1,,1.00,,1,,1
|
||||
404,0,,0.00,didn't change the order to make sure the post is newest,1,,0.333333333
|
||||
405,0,,0.00,,1,,0.333333333
|
||||
406,0,,0.00,,1,,0.333333333
|
||||
407,0.5,,0.00,,1,,0.5
|
||||
408,0.5,,0.00,,1,,0.5
|
||||
409,1,,1.00,,1,,1
|
||||
410,1,,1.00,,1,,1
|
||||
411,0,,0.00,successfylly nevigate to the right LICENSE but not able to upload the file,0,,0
|
||||
412,0,,0.00,,0,,0
|
||||
413,0,,0.00,,0,,0
|
||||
414,1,,0.00,,1,,0.666666667
|
||||
415,0.5,"ccompleted, but self-evaluation failed",0.50,did perform @ but the agent self cannot detect if the task is completed or not,0.5,,0.5
|
||||
416,0.5,"ccompleted, but self-evaluation failed",0.00,-,0.5,,0.333333333
|
||||
417,0,,0.00,,0,,0
|
||||
418,1,,1.00,,1,,1
|
||||
419,1,,1.00,,1,,1
|
||||
420,1,,1.00,,1,,1
|
||||
421,1,,1.00,,1,,1
|
||||
422,1,,1.00,,1,,1
|
||||
423,0,,0.00,,0,,0
|
||||
424,1,,1.00,,1,,1
|
||||
425,0,website does not provide the information,0.00,,0,,0
|
||||
426,1,,0.00,,1,,0.666666667
|
||||
427,0,website does not provide the information,0.00,,1,,0.333333333
|
||||
428,0,start url error,0.00,,1,,0.333333333
|
||||
429,1,,1.00,,1,,1
|
||||
430,0,,0.00,,0,,0
|
||||
431,0,,0.00,,0,,0
|
||||
432,0,,1.00,,1,,0.666666667
|
||||
433,0,,1.00,,1,,0.666666667
|
||||
434,0.5,Click on the tablist failed,0.00,,1,,0.5
|
||||
435,0,,0.00,,1,,0.333333333
|
||||
436,0,,1.00,,1,,0.666666667
|
||||
437,0.5,shopping cart,1.00,,1,,0.833333333
|
||||
438,0,,0.00,,0,,0
|
||||
439,0,,0.00,,0.5,,0.166666667
|
||||
440,0,,0.00,,1,,0.333333333
|
||||
441,0,,1.00,,1,,0.666666667
|
||||
442,1,,0.00,,0,,0.333333333
|
||||
443,0,,1.00,,1,,0.666666667
|
||||
444,0,,1.00,,1,,0.666666667
|
||||
445,0,,1.00,,1,,0.666666667
|
||||
446,0.5,correct issue,0.00,,0.5,,0.333333333
|
||||
447,0.5,correct issue,0.00,,0,,0.166666667
|
||||
448,1,,1.00,,1,,1
|
||||
449,1,,1.00,,1,,1
|
||||
450,1,,1.00,,1,,1
|
||||
451,1,,1.00,,0.5,,0.833333333
|
||||
452,1,,1.00,,1,,1
|
||||
453,1,,1.00,,1,,1
|
||||
454,1,,1.00,,1,,1
|
||||
455,1,,1.00,,1,,1
|
||||
456,1,parachute?,1.00,,1,,1
|
||||
457,0,twice,1.00,,1,,0.666666667
|
||||
458,1,,1.00,,1,,1
|
||||
459,1,,1.00,,1,,1
|
||||
460,1,,1.00,,1,,1
|
||||
461,1,,1.00,,1,,1
|
||||
462,1,,1.00,,1,,1
|
||||
463,1,,1.00,,1,,1
|
||||
464,0,,0.00,,0,,0
|
||||
465,1,,1.00,,1,,1
|
||||
466,1,,1.00,,1,,1
|
||||
467,1,,1.00,,1,,1
|
||||
468,1,,1.00,,1,,1
|
||||
469,1,,1.00,,1,,1
|
||||
470,0,,0.00,,0.5,,0.166666667
|
||||
471,0,,0.00,,0,,0
|
||||
472,1,,1.00,,1,,1
|
||||
473,1,,1.00,,1,,1
|
||||
474,1,,1.00,,1,,1
|
||||
475,1,,1.00,,1,,1
|
||||
476,1,,1.00,,1,,1
|
||||
477,1,,1.00,,1,,1
|
||||
478,1,,1.00,,1,,1
|
||||
479,1,,1.00,,0.5,,0.833333333
|
||||
480,0,,0.00,,0,,0
|
||||
481,0,,0.00,,0,,0
|
||||
482,0,,0.00,,0,,0
|
||||
483,0,,0.00,,0,,0
|
||||
484,0,,0.00,,0,,0
|
||||
485,0,,0.00,,0,,0
|
||||
486,1,,1.00,,1,,1
|
||||
487,1,,1.00,,1,,1
|
||||
488,1,,1.00,,1,,1
|
||||
489,1,,1.00,,1,,1
|
||||
490,1,,1.00,,1,,1
|
||||
491,0.5,sent email but not message,1.00,,1,,0.833333333
|
||||
492,0.5,repeated process,0.00,,0.5,,0.333333333
|
||||
493,0.5,sent email but not message,1.00,,1,,0.833333333
|
||||
494,0.5,sent email but not message,0.00,,0.5,,0.333333333
|
||||
495,1,,1.00,,1,,1
|
||||
496,1,,1.00,,1,,1
|
||||
497,1,,1.00,,0.5,,0.833333333
|
||||
498,0.5,,0.00,,0,,0.166666667
|
||||
499,1,,1.00,,1,,1
|
||||
500,1,q,1.00,,1,,1
|
||||
501,0.5,,1.00,,1,,0.833333333
|
||||
502,0,,0.00,,1,,0.333333333
|
||||
503,0.5,,1.00,,0.5,,0.666666667
|
||||
504,0.5,,1.00,,0.5,,0.666666667
|
||||
505,0,,1.00,,1,,0.666666667
|
||||
506,0.5,,0.00,,1,,0.5
|
||||
507,0.5,,0.00,,0,,0.166666667
|
||||
508,1,,0.00,,1,,0.666666667
|
||||
509,1,,0.00,,0.5,,0.5
|
||||
510,0.5,,0.00,,0.5,,0.333333333
|
||||
511,1,,0.00,didnt choose rating as order,1,,0.666666667
|
||||
512,1,,1.00,,1,,1
|
||||
513,1,,1.00,,1,,1
|
||||
514,1,,1.00,,1,,1
|
||||
515,1,,1.00,,1,,1
|
||||
516,1,,1.00,,1,,1
|
||||
517,1,,1.00,,1,,1
|
||||
518,1,,1.00,,1,,1
|
||||
519,1,,1.00,,1,,1
|
||||
520,1,,1.00,,1,,1
|
||||
521,1,,1.00,,1,,1
|
||||
522,0,,0.00,,1,,0.333333333
|
||||
523,0,,0.00,,0.5,,0.166666667
|
||||
524,0.5,,0.00,,1,,0.5
|
||||
525,0,,0.00,,1,,0.333333333
|
||||
526,0.5,,0.00,,1,,0.5
|
||||
527,1,,0.00,,1,,0.666666667
|
||||
528,1,id,1.00,,1,,1
|
||||
529,1,,1.00,,1,,1
|
||||
530,1,,1.00,,1,,1
|
||||
531,1,,1.00,,1,,1
|
||||
532,1,,1.00,,1,,1
|
||||
533,1,,1.00,,1,,1
|
||||
534,1,,1.00,,1,,1
|
||||
535,1,,1.00,,1,,1
|
||||
536,1,,1.00,,1,,1
|
||||
537,1,,1.00,,1,,1
|
||||
538,1,,0.00,,1,,0.666666667
|
||||
539,1,,1.00,,1,,1
|
||||
540,1,,1.00,,1,,1
|
||||
541,1,,1.00,,1,,1
|
||||
542,1,,1.00,,1,,1
|
||||
543,0,,0.00,,0.5,,0.166666667
|
||||
544,0.5,,0.00,,0.5,,0.333333333
|
||||
545,0,,0.00,,0.5,,0.166666667
|
||||
546,0,,0.00,,0.5,,0.166666667
|
||||
547,0.5,,0.00,,1,,0.5
|
||||
548,0.5,"L, XL XS",1.00,,1,,0.833333333
|
||||
549,0.5,,0.00,,0.5,,0.333333333
|
||||
550,0.5,,0.00,,0.5,,0.333333333
|
||||
551,1,,1.00,,1,,1
|
||||
552,0.5,1 url ?,1.00,,1,,0.833333333
|
||||
553,0.5,,0.00,,1,,0.5
|
||||
554,0,,0.00,,1,,0.333333333
|
||||
555,0,,0.00,,1,,0.333333333
|
||||
556,0,,0.00,,0,,0
|
||||
557,0,,0.00,,0,,0
|
||||
558,0.5,,0.00,,0,,0.166666667
|
||||
559,0,,0.00,,0,,0
|
||||
560,0,,0.00,project url,0,,0
|
||||
561,0,,0.00,,0.5,,0.166666667
|
||||
562,0,,0.00,,0,,0
|
||||
563,0,,0.00,,0.5,,0.166666667
|
||||
564,0.5,,0.50,create the repo but not able to edit README,0,,0.333333333
|
||||
565,0,,0.00,,0,,0
|
||||
566,0,,0.00,,0,,0
|
||||
567,0,,0.00,,0,,0
|
||||
568,0,,0.00,,0,,0
|
||||
569,0,,0.00,,0.5,,0.166666667
|
||||
570,0,,0.00,,0.5,,0.166666667
|
||||
571,1,,1.00,,1,,1
|
||||
572,1,,1.00,,1,,1
|
||||
573,1,,1.00,,1,,1
|
||||
574,1,,1.00,,1,,1
|
||||
575,1,,1.00,,1,,1
|
||||
576,0,,0.00,,0,,0
|
||||
577,0,,0.00,,0,,0
|
||||
578,0,,0.00,,0,,0
|
||||
579,0,,0.00,,0,,0
|
||||
580,1,,1.00,,1,,1
|
||||
581,1,,1.00,,1,,1
|
||||
582,1,,1.00,,1,,1
|
||||
583,1,,1.00,,1,,1
|
||||
584,1,,1.00,,1,,1
|
||||
585,0,,0.00,,0,,0
|
||||
586,0,,0.00,search order one by one,0,,0
|
||||
587,0,,0.00,,0,,0
|
||||
588,0,,0.00,,0,,0
|
||||
589,0,,0.00,,0,,0
|
||||
590,1,,1.00,,0.5,,0.833333333
|
||||
591,1,,1.00,,0.5,,0.833333333
|
||||
592,1,,0.00,not able to set start date,0.5,,0.5
|
||||
593,0,,0.00,,0.5,,0.166666667
|
||||
594,1,minor error,0.00,not able to set due date,0.5,,0.5
|
||||
595,0.5,,0.50,suscribed to the forum but didnt successfully open thread becase kept opening the pic so didn't subscribed the thread,1,?,0.666666667
|
||||
596,1,,0.50,,1,?,0.833333333
|
||||
597,1,,0.00,,1,?,0.666666667
|
||||
598,1,,0.00,,1,?,0.666666667
|
||||
599,1,,1.00,,1,?,1
|
||||
600,1,,1.00,,1,,1
|
||||
601,1,,1.00,,1,,1
|
||||
602,1,,1.00,,1,,1
|
||||
603,1,,1.00,,1,,1
|
||||
604,1,,1.00,,1,,1
|
||||
605,1,,1.00,,1,,1
|
||||
606,1, ,1.00,,1,,1
|
||||
607,1,,1.00,,1,,1
|
||||
608,1,,1.00,,1,,1
|
||||
609,1,,1.00,,1,,1
|
||||
610,1,,1.00,,1,,1
|
||||
611,1,,1.00,,1,,1
|
||||
612,1,,1.00,,1,,1
|
||||
613,1,,1.00,,1,,1
|
||||
614,1,,1.00,,1,,1
|
||||
615,0,,0.00,,0,,0
|
||||
616,0,,0.50,,0,,0.166666667
|
||||
617,0,,0.00,,0,,0
|
||||
618,0,,0.00,,0,,0
|
||||
619,0,,0.00,,0,,0
|
||||
620,1,,1.00,,1,,1
|
||||
621,1,,1.00,,1,,1
|
||||
622,1,,1.00,,1,,1
|
||||
623,1,,1.00,,1,,1
|
||||
624,1,,1.00,,1,,1
|
||||
625,1,,1.00,,1,,1
|
||||
626,1,,1.00,,1,,1
|
||||
627,0,,0.00,not able to enter title,0,,0
|
||||
628,1,,1.00,,1,,1
|
||||
629,1,,1.00,,1,,1
|
||||
630,1,,1.00,,1,,1
|
||||
631,1,,1.00,,1,,1
|
||||
632,1,,1.00,,1,,1
|
||||
633,1,,1.00,,1,,1
|
||||
634,1,,1.00,,1,,1
|
||||
635,1,,1.00,,1,,1
|
||||
636,1,,1.00,,1,,1
|
||||
637,1,,1.00,,1,,1
|
||||
638,1,,1.00,,1,,1
|
||||
639,1,,1.00,,1,,1
|
||||
640,1,,1.00,,1,,1
|
||||
641,1,,1.00,,1,,1
|
||||
642,1,,1.00,,1,,1
|
||||
643,1,,1.00,,1,,1
|
||||
644,1,,1.00,,1,,1
|
||||
645,1,,1.00,,1,,1
|
||||
646,1,,1.00,,1,,1
|
||||
647,1,,1.00,,1,,1
|
||||
648,1,,1.00,,1,,1
|
||||
649,1,,1.00,,1,,1
|
||||
650,1,,1.00,,1,,1
|
||||
651,1,,1.00,,1,,1
|
||||
652,1,,1.00,,1,,1
|
||||
653,1,,1.00,,1,,1
|
||||
654,1,,1.00,,1,,1
|
||||
655,1,,0.50,comment is weird,1,,0.833333333
|
||||
656,1,,0.50,,1,,0.833333333
|
||||
657,1,,0.50,,1,,0.833333333
|
||||
658,0.5,,0.50,not able to scoll to 2030,0.5,,0.5
|
||||
659,0.5,,0.50,,1,,0.666666667
|
||||
660,0.5,,0.50,not able to assign to the person,1,,0.666666667
|
||||
661,1,,1.00,,1,,1
|
||||
662,1,,1.00,,1,,1
|
||||
663,1,,1.00,,1,,1
|
||||
664,1,,1.00,,1,,1
|
||||
665,1,,1.00,,1,,1
|
||||
666,0.5,,0.00,,1,,0.5
|
||||
667,1,,1.00,,1,,1
|
||||
668,0.5,,0.00,,1,,0.5
|
||||
669,1,,1.00,,1,,1
|
||||
670,1,,1.00,,1,,1
|
||||
671,0.5,,0.00,didn't post in reddit,1,,0.5
|
||||
672,0.5,,0.00,,1,,0.5
|
||||
673,0.5,,0.00,,1,,0.5
|
||||
674,0.5,,0.00,,1,,0.5
|
||||
675,0.5,,0.00,,1,,0.5
|
||||
676,1,?,1.00,,1,,1
|
||||
677,1,,1.00,,1,,1
|
||||
678,1,,1.00,,1,,1
|
||||
679,1,,1.00,,1,,1
|
||||
680,1,,1.00,,1,,1
|
||||
681,0,,0.00,,0.5,,0.166666667
|
||||
682,0,,0.00,,0.5,,0.166666667
|
||||
683,0,,0.00,,0,,0
|
||||
684,0,,0.00,,0,,0
|
||||
685,0,,0.00,not able to related gitlab repo with reddit,1,,0.333333333
|
||||
686,0,,0.00,,1,,0.333333333
|
||||
687,0,,0.00,,1,,0.333333333
|
||||
688,0,,0.00,,1,,0.333333333
|
||||
689,1,,1.00,,1,,1
|
||||
690,1,,1.00,,1,,1
|
||||
691,1,,1.00,,1,,1
|
||||
692,1,,1.00,,1,,1
|
||||
693,1,,1.00,,1,,1
|
||||
694,0.5,size,1.00,,1,,0.833333333
|
||||
695,0.5,,1.00,,1,,0.833333333
|
||||
696,0.5,,1.00,,0.5,,0.666666667
|
||||
697,0.5,,1.00,,0.5,,0.666666667
|
||||
698,0.5,,1.00,,1,,0.833333333
|
||||
699,0.5,website ,0.00,,0,,0.166666667
|
||||
700,0.5,,0.00,not able to stop within limited steps,0.5,,0.333333333
|
||||
701,0.5,,1.00,,1,,0.833333333
|
||||
702,0.5,,0.00,,0,,0.166666667
|
||||
703,0,,0.50,not able to update ,0.5,,0.333333333
|
||||
704,1,,0.50,,0.5,,0.666666667
|
||||
705,1,,1.00,,1,,1
|
||||
706,1,,0.00,,1,,0.666666667
|
||||
707,1,,1.00,,1,,1
|
||||
708,0.5,,0.00,,1,,0.5
|
||||
709,1,successfully shows the results,0.00,,0.5,,0.5
|
||||
710,1,,0.50,,0.5,,0.666666667
|
||||
711,1,looks right,0.00,,0.5,,0.5
|
||||
712,1,,0.00,,1,,0.666666667
|
||||
713,1,,1.00,,1,,1
|
||||
714,1,,1.00,,1,,1
|
||||
715,0,,1.00,,0,,0.333333333
|
||||
716,0.5,,1.00,,1,,0.833333333
|
||||
717,0.5,should be r/movie,1.00,,1,,0.833333333
|
||||
718,0.5,,0.00,,0.5,,0.333333333
|
||||
719,0.5,,0.00,,1,,0.5
|
||||
720,1,,0.00,,0.5,,0.5
|
||||
721,1,,1.00,,0.5,,0.833333333
|
||||
722,0.5,,1.00,,1,,0.833333333
|
||||
723,0,,1.00,,0,,0.333333333
|
||||
724,1,,0.00,,0,,0.333333333
|
||||
725,1,,0.00,,0,,0.333333333
|
||||
726,1,,0.00,,0,,0.333333333
|
||||
727,1,,0.00,,0.5,,0.5
|
||||
728,0,,0.00,,0,,0
|
||||
729,1,,0.00,,0,,0.333333333
|
||||
730,0.5,,0.00,,0,,0.166666667
|
||||
731,0,,1.00,,1,,0.666666667
|
||||
732,0,,1.00,,1,,0.666666667
|
||||
733,0,,1.00,,1,,0.666666667
|
||||
734,0,,1.00,,1,,0.666666667
|
||||
735,0,,1.00,,1,,0.666666667
|
||||
736,1,,1.00,,1,,1
|
||||
737,1,,1.00,,1,,1
|
||||
738,1,,1.00,,1,,1
|
||||
739,1,,1.00,,1,,1
|
||||
740,1,,1.00,,1,,1
|
||||
741,1,,1.00,,0,,0.666666667
|
||||
742,0.5,,0.00,,0,,0.166666667
|
||||
743,0.5,,0.00,,0,,0.166666667
|
||||
744,0.5,case sensitive,0.00,,0.5,,0.333333333
|
||||
745,0.5,,0.00,,0,,0.166666667
|
||||
746,0.5,,0.00,,0,,0.166666667
|
||||
747,0,,0.00,,0,,0
|
||||
748,0.5,,0.00,,0,,0.166666667
|
||||
749,0,,0.00,,0,,0
|
||||
750,0,,0.00,,0,,0
|
||||
751,0,,0.00,,0,,0
|
||||
752,0.5,,0.00,,0,,0.166666667
|
||||
753,0,,0.00,,1,,0.333333333
|
||||
754,0,,0.00,,1,,0.333333333
|
||||
755,0,,1.00,,1,,0.666666667
|
||||
756,0.5,,1.00,,1,,0.833333333
|
||||
757,1,,1.00,,1,,1
|
||||
758,1,,1.00,,1,,1
|
||||
759,1,No enough information,1.00,,1,,1
|
||||
760,1,,0.00,,0,,0.333333333
|
||||
761,0,q,0.00,,0,,0
|
||||
762,1,,0.00,,1,,0.666666667
|
||||
763,1,,0.00,,1,,0.666666667
|
||||
764,0,q,0.00,,0,,0
|
||||
765,0,,0.00,,0,,0
|
||||
766,0,,0.00,,0,,0
|
||||
767,0,,0.00,,0,,0
|
||||
768,0.5,,0.00,,1,,0.5
|
||||
769,0.5,,0.00,didn't add on top of the original quantity,0,,0.166666667
|
||||
770,1,,1.00,,1,,1
|
||||
771,1,,1.00,,1,,1
|
||||
772,0,,1.00,,1,,0.666666667
|
||||
773,0,,0.00,,1,,0.333333333
|
||||
774,0,,0.00,,0.5,,0.166666667
|
||||
775,0,,0.00,,0,,0
|
||||
776,0,,0.00,,0,,0
|
||||
777,0.5,,0.00,,0,,0.166666667
|
||||
778,1,,1.00,,1,,1
|
||||
779,0,,1.00,,1,,0.666666667
|
||||
780,1,,1.00,,1,,1
|
||||
781,0,,0.00,,0,,0
|
||||
782,0,,1.00,,1,,0.666666667
|
||||
783,0,,0.00,,0,,0
|
||||
784,1,,1.00,,1,,1
|
||||
785,1,,1.00,,1,,1
|
||||
786,1,,1.00,,1,,1
|
||||
787,1,,1.00,,1,,1
|
||||
788,1,,1.00,,1,,1
|
||||
789,0,,0.00,,0.5,,0.166666667
|
||||
790,0,,1.00,,0.5,,0.5
|
||||
791,0,,1.00,,1,,0.666666667
|
||||
792,0,,0.00,,1,,0.333333333
|
||||
793,0,,1.00,,1,,0.666666667
|
||||
794,0,,0.00,,0,,0
|
||||
795,1,,1.00,,0.5,,0.833333333
|
||||
796,1,,1.00,,0.5,,0.833333333
|
||||
797,1,,1.00,,0.5,,0.833333333
|
||||
798,0,,0.00,,0.5,,0.166666667
|
||||
799,0.5,,0.00,,0,,0.166666667
|
||||
800,0.5,,0.00,,0,,0.166666667
|
||||
801,0.5,,0.00,,0.5,,0.333333333
|
||||
802,0.5,,0.00,,0,,0.166666667
|
||||
803,0.5,,0.50,,0,,0.333333333
|
||||
804,0.5,,0.00,,0.5,,0.333333333
|
||||
805,0.5,,0.00,,1,,0.5
|
||||
806,1,,1.00,,1,,1
|
||||
807,1,,0.00,,0,,0.333333333
|
||||
808,1,,1.00,,1,,1
|
||||
809,1,,1.00,,1,,1
|
||||
810,0,,1.00,,1,,0.666666667
|
||||
811,1,,1.00,,0,,0.666666667
|
||||
Win Rate,0.619458128,,0.591748768,,0.740763547,,0.650656814
|
|
BIN
files/human_assessment/WebVoyager-AgentOccam-Alice.xlsx
Normal file
BIN
files/human_assessment/WebVoyager-AgentOccam-Alice.xlsx
Normal file
Binary file not shown.
BIN
files/human_assessment/WebVoyager-AgentOccam-Bob.xlsx
Normal file
BIN
files/human_assessment/WebVoyager-AgentOccam-Bob.xlsx
Normal file
Binary file not shown.
BIN
files/human_assessment/WebVoyager-AgentOccam-Colin.xlsx
Normal file
BIN
files/human_assessment/WebVoyager-AgentOccam-Colin.xlsx
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user