Changeset 90419 in webkit
- Timestamp:
- Jul 5, 2011 5:12:06 PM (13 years ago)
- Location:
- trunk/Tools
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Tools/ChangeLog
r90418 r90419 5 5 6 6 * TestResultServer/handlers/testfilehandler.py: 7 8 2011-07-05 Dirk Pranke <dpranke@chromium.org> 9 10 nrwt: allow for multiple http shards 11 https://bugs.webkit.org/show_bug.cgi?id=63116 12 13 Reviewed by Tony Chang. 14 15 This modifies the sharding logic to support multiple http 16 shards, but for now we clamp to one shard until we can test 17 perf impact and flakiness impact. 18 19 * Scripts/webkitpy/layout_tests/layout_package/manager.py: 20 * Scripts/webkitpy/layout_tests/layout_package/manager_unittest.py: 7 21 8 22 2011-07-05 Dirk Pranke <dpranke@chromium.org> -
trunk/Tools/Scripts/webkitpy/layout_tests/layout_package/manager.py
r90417 r90419 238 238 239 239 240 class TestShard(object): 241 """A test shard is a named list of TestInputs.""" 242 243 # FIXME: Make this class visible, used by workers as well. 244 def __init__(self, name, test_inputs): 245 self.name = name 246 self.test_inputs = test_inputs 247 248 def __repr__(self): 249 return "TestShard(name='%s', test_inputs=%s'" % (self.name, self.test_inputs) 250 251 def __eq__(self, other): 252 return self.name == other.name and self.test_inputs == other.test_inputs 253 254 240 255 class Manager(object): 241 256 """A class for managing running a series of tests on a series of layout … … 536 551 test_expectations.SLOW) 537 552 538 def _shard_tests(self, test_files, use_real_shards):553 def _shard_tests(self, test_files, num_workers, fully_parallel): 539 554 """Groups tests into batches. 540 555 This helps ensure that tests that depend on each other (aka bad tests!) 541 556 continue to run together as most cross-tests dependencies tend to 542 occur within the same directory. If use_real_shards is False, we 543 put each (non-HTTP/websocket) test into its own shard for maximum 544 concurrency instead of trying to do any sort of real sharding. 545 557 occur within the same directory. 546 558 Return: 547 Two lists of lists of TestInput objects. The first list should 548 only be run under the server lock, the second can be run whenever. 549 """ 550 # FIXME: We still need to support multiple locked shards. 559 Two list of TestShards. The first contains tests that must only be 560 run under the server lock, the second can be run whenever. 561 """ 562 563 # FIXME: Move all of the sharding logic out of manager into its 564 # own class or module. Consider grouping it with the chunking logic 565 # in prepare_lists as well. 566 if num_workers == 1: 567 return self._shard_in_two(test_files) 568 elif fully_parallel: 569 return self._shard_every_file(test_files) 570 return self._shard_by_directory(test_files, num_workers) 571 572 def _shard_in_two(self, test_files): 573 """Returns two lists of shards, one with all the tests requiring a lock and one with the rest. 574 575 This is used when there's only one worker, to minimize the per-shard overhead.""" 576 locked_inputs = [] 577 unlocked_inputs = [] 578 for test_file in test_files: 579 test_input = self._get_test_input_for_file(test_file) 580 if self._test_requires_lock(test_file): 581 locked_inputs.append(test_input) 582 else: 583 unlocked_inputs.append(test_input) 584 return [TestShard('locked_tests', locked_inputs)], [TestShard('unlocked_tests', unlocked_inputs)] 585 586 def _shard_every_file(self, test_files): 587 """Returns two lists of shards, each shard containing a single test file. 588 589 This mode gets maximal parallelism at the cost of much higher flakiness.""" 551 590 locked_shards = [] 552 591 unlocked_shards = [] 553 tests_to_http_lock = [] 554 if not use_real_shards: 555 for test_file in test_files: 556 test_input = self._get_test_input_for_file(test_file) 557 if self._test_requires_lock(test_file): 558 tests_to_http_lock.append(test_input) 559 else: 560 unlocked_shards.append((".", [test_input])) 561 else: 562 tests_by_dir = {} 563 for test_file in test_files: 564 directory = self._get_dir_for_test_file(test_file) 565 test_input = self._get_test_input_for_file(test_file) 566 if self._test_requires_lock(test_file): 567 tests_to_http_lock.append(test_input) 568 else: 569 tests_by_dir.setdefault(directory, []) 570 tests_by_dir[directory].append(test_input) 571 for directory in tests_by_dir: 572 test_list = tests_by_dir[directory] 573 test_list_tuple = (directory, test_list) 574 unlocked_shards.append(test_list_tuple) 575 576 # Sort the shards by directory name. 577 unlocked_shards.sort(lambda a, b: cmp(a[0], b[0])) 578 579 if tests_to_http_lock: 580 locked_shards = [("tests_to_http_lock", tests_to_http_lock)] 581 582 return (locked_shards, unlocked_shards) 592 for test_file in test_files: 593 test_input = self._get_test_input_for_file(test_file) 594 595 # Note that we use a '.' for the shard name; the name doesn't really 596 # matter, and the only other meaningful value would be the filename, 597 # which would be really redundant. 598 if self._test_requires_lock(test_file): 599 locked_shards.append(TestShard('.', [test_input])) 600 else: 601 unlocked_shards.append(TestShard('.', [test_input])) 602 603 return locked_shards, unlocked_shards 604 605 def _shard_by_directory(self, test_files, num_workers): 606 """Returns two lists of shards, each shard containing all the files in a directory. 607 608 This is the default mode, and gets as much parallelism as we can while 609 minimizing flakiness caused by inter-test dependencies.""" 610 locked_shards = [] 611 unlocked_shards = [] 612 tests_by_dir = {} 613 # FIXME: Given that the tests are already sorted by directory, 614 # we can probably rewrite this to be clearer and faster. 615 for test_file in test_files: 616 directory = self._get_dir_for_test_file(test_file) 617 test_input = self._get_test_input_for_file(test_file) 618 tests_by_dir.setdefault(directory, []) 619 tests_by_dir[directory].append(test_input) 620 621 for directory, test_inputs in tests_by_dir.iteritems(): 622 shard = TestShard(directory, test_inputs) 623 if self._test_requires_lock(directory): 624 locked_shards.append(shard) 625 else: 626 unlocked_shards.append(shard) 627 628 # Sort the shards by directory name. 629 locked_shards.sort(key=lambda shard: shard.name) 630 unlocked_shards.sort(key=lambda shard: shard.name) 631 632 return (self._resize_shards(locked_shards, self._max_locked_shards(num_workers), 633 'locked_shard'), 634 unlocked_shards) 635 636 def _max_locked_shards(self, num_workers): 637 # Put a ceiling on the number of locked shards, so that we 638 # don't hammer the servers too badly. 639 640 # FIXME: For now, limit to one shard. After testing to make sure we 641 # can handle multiple shards, we should probably do something like 642 # limit this to no more than a quarter of all workers, e.g.: 643 # return max(math.ceil(num_workers / 4.0), 1) 644 return 1 645 646 def _resize_shards(self, old_shards, max_new_shards, shard_name_prefix): 647 """Takes a list of shards and redistributes the tests into no more 648 than |max_new_shards| new shards.""" 649 650 # This implementation assumes that each input shard only contains tests from a 651 # single directory, and that tests in each shard must remain together; as a 652 # result, a given input shard is never split between output shards. 653 # 654 # Each output shard contains the tests from one or more input shards and 655 # hence may contain tests from multiple directories. 656 657 def divide_and_round_up(numerator, divisor): 658 return int(math.ceil(float(numerator) / divisor)) 659 660 def extract_and_flatten(shards): 661 test_inputs = [] 662 for shard in shards: 663 test_inputs.extend(shard.test_inputs) 664 return test_inputs 665 666 def split_at(seq, index): 667 return (seq[:index], seq[index:]) 668 669 num_old_per_new = divide_and_round_up(len(old_shards), max_new_shards) 670 new_shards = [] 671 remaining_shards = old_shards 672 while remaining_shards: 673 some_shards, remaining_shards = split_at(remaining_shards, num_old_per_new) 674 new_shards.append(TestShard('%s_%d' % (shard_name_prefix, len(new_shards) + 1), 675 extract_and_flatten(some_shards))) 676 return new_shards 583 677 584 678 def _contains_tests(self, subdir): … … 588 682 return False 589 683 590 def _num_workers(self, num_shards): 591 num_workers = min(int(self._options.child_processes), num_shards) 684 def _log_num_workers(self, num_workers, num_shards, num_locked_shards): 592 685 driver_name = self._port.driver_name() 593 686 if num_workers == 1: … … 595 688 (driver_name, grammar.pluralize('shard', num_shards))) 596 689 else: 597 self._printer.print_config("Running %d %ss in parallel over %d shards" % 598 (num_workers, driver_name, num_shards)) 599 return num_workers 690 self._printer.print_config("Running %d %ss in parallel over %d shards (%d locked)" % 691 (num_workers, driver_name, num_shards, num_locked_shards)) 600 692 601 693 def _run_tests(self, file_list, result_summary): … … 626 718 self._printer.print_update('Sharding tests ...') 627 719 locked_shards, unlocked_shards = self._shard_tests(file_list, 628 int(self._options.child_processes) > 1 and notself._options.experimental_fully_parallel)720 int(self._options.child_processes), self._options.experimental_fully_parallel) 629 721 630 722 # FIXME: We don't have a good way to coordinate the workers so that … … 641 733 self.start_servers_with_lock() 642 734 643 num_workers = self._num_workers(len(all_shards)) 735 num_workers = min(int(self._options.child_processes), len(all_shards)) 736 self._log_num_workers(num_workers, len(all_shards), len(locked_shards)) 737 644 738 manager_connection = manager_worker_broker.get(self._port, self._options, 645 739 self, worker.Worker) … … 664 758 self._printer.print_update("Starting testing ...") 665 759 for shard in all_shards: 666 manager_connection.post_message('test_list', shard[0], shard[1]) 760 # FIXME: Change 'test_list' to 'shard', make sharding public. 761 manager_connection.post_message('test_list', shard.name, shard.test_inputs) 667 762 668 763 # We post one 'stop' message for each worker. Because the stop message … … 1347 1442 def find(name, test_lists): 1348 1443 for i in range(len(test_lists)): 1349 if test_lists[i] [0]== name:1444 if test_lists[i].name == name: 1350 1445 return i 1351 1446 return -1 -
trunk/Tools/Scripts/webkitpy/layout_tests/layout_package/manager_unittest.py
r90417 r90419 40 40 from webkitpy import layout_tests 41 41 from webkitpy.layout_tests import run_webkit_tests 42 from webkitpy.layout_tests.layout_package.manager import Manager, natural_sort_key, path_key, TestRunInterruptedException 42 from webkitpy.layout_tests.layout_package.manager import Manager, natural_sort_key, path_key, TestRunInterruptedException, TestShard 43 43 from webkitpy.layout_tests.layout_package import printing 44 44 from webkitpy.layout_tests.layout_package.result_summary import ResultSummary … … 51 51 52 52 53 class ManagerTest(unittest.TestCase): 54 def test_shard_tests(self): 55 # Test that _shard_tests in test_runner.TestRunner really 56 # put the http tests first in the queue. 53 class ShardingTests(unittest.TestCase): 54 # FIXME: Remove "LayoutTests" from this if we can ever convert the generic 55 # code from filenames to test names. 56 test_list = [ 57 "LayoutTests/http/tests/websocket/tests/unicode.htm", 58 "LayoutTests/animations/keyframes.html", 59 "LayoutTests/http/tests/security/view-source-no-refresh.html", 60 "LayoutTests/http/tests/websocket/tests/websocket-protocol-ignored.html", 61 "LayoutTests/fast/css/display-none-inline-style-change-crash.html", 62 "LayoutTests/http/tests/xmlhttprequest/supported-xml-content-types.html", 63 "LayoutTests/dom/html/level2/html/HTMLAnchorElement03.html", 64 "LayoutTests/ietestcenter/Javascript/11.1.5_4-4-c-1.html", 65 "LayoutTests/dom/html/level2/html/HTMLAnchorElement06.html", 66 ] 67 68 def get_shards(self, num_workers, fully_parallel): 57 69 port = Mock() 58 70 port._filesystem = filesystem_mock.MockFileSystem() 59 manager = ManagerWrapper(port=port, options=Mock(), printer=Mock()) 60 61 test_list = [ 62 "LayoutTests/websocket/tests/unicode.htm", 63 "LayoutTests/animations/keyframes.html", 64 "LayoutTests/http/tests/security/view-source-no-refresh.html", 65 "LayoutTests/websocket/tests/websocket-protocol-ignored.html", 66 "LayoutTests/fast/css/display-none-inline-style-change-crash.html", 67 "LayoutTests/http/tests/xmlhttprequest/supported-xml-content-types.html", 68 "LayoutTests/dom/html/level2/html/HTMLAnchorElement03.html", 69 "LayoutTests/ietestcenter/Javascript/11.1.5_4-4-c-1.html", 70 "LayoutTests/dom/html/level2/html/HTMLAnchorElement06.html", 71 ] 72 73 expected_tests_to_http_lock = set([ 74 'LayoutTests/websocket/tests/unicode.htm', 75 'LayoutTests/http/tests/security/view-source-no-refresh.html', 76 'LayoutTests/websocket/tests/websocket-protocol-ignored.html', 77 'LayoutTests/http/tests/xmlhttprequest/supported-xml-content-types.html', 78 ]) 79 80 single_locked, single_unlocked = manager._shard_tests(test_list, False) 81 multi_locked, multi_unlocked = manager._shard_tests(test_list, True) 82 83 self.assertEqual("tests_to_http_lock", single_locked[0][0]) 84 self.assertEqual(expected_tests_to_http_lock, set(single_locked[0][1])) 85 self.assertEqual("tests_to_http_lock", multi_locked[0][0]) 86 self.assertEqual(expected_tests_to_http_lock, set(multi_locked[0][1])) 71 self.manager = ManagerWrapper(port=port, options=Mock(), printer=Mock()) 72 return self.manager._shard_tests(self.test_list, num_workers, fully_parallel) 73 74 def test_shard_by_dir(self): 75 locked, unlocked = self.get_shards(num_workers=2, fully_parallel=False) 76 77 # Note that although there are tests in multiple dirs that need locks, 78 # they are crammed into a single shard in order to reduce the # of 79 # workers hitting the server at once. 80 self.assertEquals(locked, 81 [TestShard('locked_shard_1', 82 ['LayoutTests/http/tests/security/view-source-no-refresh.html', 83 'LayoutTests/http/tests/websocket/tests/unicode.htm', 84 'LayoutTests/http/tests/websocket/tests/websocket-protocol-ignored.html', 85 'LayoutTests/http/tests/xmlhttprequest/supported-xml-content-types.html'])]) 86 self.assertEquals(unlocked, 87 [TestShard('animations', 88 ['LayoutTests/animations/keyframes.html']), 89 TestShard('dom/html/level2/html', 90 ['LayoutTests/dom/html/level2/html/HTMLAnchorElement03.html', 91 'LayoutTests/dom/html/level2/html/HTMLAnchorElement06.html']), 92 TestShard('fast/css', 93 ['LayoutTests/fast/css/display-none-inline-style-change-crash.html']), 94 TestShard('ietestcenter/Javascript', 95 ['LayoutTests/ietestcenter/Javascript/11.1.5_4-4-c-1.html'])]) 96 97 def test_shard_every_file(self): 98 locked, unlocked = self.get_shards(num_workers=2, fully_parallel=True) 99 self.assertEquals(locked, 100 [TestShard('.', ['LayoutTests/http/tests/websocket/tests/unicode.htm']), 101 TestShard('.', ['LayoutTests/http/tests/security/view-source-no-refresh.html']), 102 TestShard('.', ['LayoutTests/http/tests/websocket/tests/websocket-protocol-ignored.html']), 103 TestShard('.', ['LayoutTests/http/tests/xmlhttprequest/supported-xml-content-types.html'])]) 104 self.assertEquals(unlocked, 105 [TestShard('.', ['LayoutTests/animations/keyframes.html']), 106 TestShard('.', ['LayoutTests/fast/css/display-none-inline-style-change-crash.html']), 107 TestShard('.', ['LayoutTests/dom/html/level2/html/HTMLAnchorElement03.html']), 108 TestShard('.', ['LayoutTests/ietestcenter/Javascript/11.1.5_4-4-c-1.html']), 109 TestShard('.', ['LayoutTests/dom/html/level2/html/HTMLAnchorElement06.html'])]) 110 111 def test_shard_in_two(self): 112 locked, unlocked = self.get_shards(num_workers=1, fully_parallel=False) 113 self.assertEquals(locked, 114 [TestShard('locked_tests', 115 ['LayoutTests/http/tests/websocket/tests/unicode.htm', 116 'LayoutTests/http/tests/security/view-source-no-refresh.html', 117 'LayoutTests/http/tests/websocket/tests/websocket-protocol-ignored.html', 118 'LayoutTests/http/tests/xmlhttprequest/supported-xml-content-types.html'])]) 119 self.assertEquals(unlocked, 120 [TestShard('unlocked_tests', 121 ['LayoutTests/animations/keyframes.html', 122 'LayoutTests/fast/css/display-none-inline-style-change-crash.html', 123 'LayoutTests/dom/html/level2/html/HTMLAnchorElement03.html', 124 'LayoutTests/ietestcenter/Javascript/11.1.5_4-4-c-1.html', 125 'LayoutTests/dom/html/level2/html/HTMLAnchorElement06.html'])]) 87 126 88 127 def test_http_locking(tester): … … 94 133 def handle_finished_list(self, source, list_name, num_tests, elapsed_time): 95 134 if not self._finished_list_called: 96 tester.assertEquals(list_name, ' tests_to_http_lock')135 tester.assertEquals(list_name, 'locked_tests') 97 136 tester.assertTrue(self._remaining_locked_shards) 98 137 tester.assertTrue(self._has_http_lock)
Note: See TracChangeset
for help on using the changeset viewer.