Changeset 90419 in webkit


Ignore:
Timestamp:
Jul 5, 2011 5:12:06 PM (13 years ago)
Author:
dpranke@chromium.org
Message:

2011-07-05 Dirk Pranke <dpranke@chromium.org>

nrwt: allow for multiple http shards
https://bugs.webkit.org/show_bug.cgi?id=63116

Reviewed by Tony Chang.

This modifies the sharding logic to support multiple http
shards, but for now we clamp to one shard until we can test
perf impact and flakiness impact.

  • Scripts/webkitpy/layout_tests/layout_package/manager.py:
  • Scripts/webkitpy/layout_tests/layout_package/manager_unittest.py:
Location:
trunk/Tools
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/Tools/ChangeLog

    r90418 r90419  
    55
    66        * TestResultServer/handlers/testfilehandler.py:
     7
     82011-07-05  Dirk Pranke  <dpranke@chromium.org>
     9
     10        nrwt: allow for multiple http shards
     11        https://bugs.webkit.org/show_bug.cgi?id=63116
     12
     13        Reviewed by Tony Chang.
     14
     15        This modifies the sharding logic to support multiple http
     16        shards, but for now we clamp to one shard until we can test
     17        perf impact and flakiness impact.
     18
     19        * Scripts/webkitpy/layout_tests/layout_package/manager.py:
     20        * Scripts/webkitpy/layout_tests/layout_package/manager_unittest.py:
    721
    8222011-07-05  Dirk Pranke  <dpranke@chromium.org>
  • trunk/Tools/Scripts/webkitpy/layout_tests/layout_package/manager.py

    r90417 r90419  
    238238
    239239
     240class TestShard(object):
     241    """A test shard is a named list of TestInputs."""
     242
     243    # FIXME: Make this class visible, used by workers as well.
     244    def __init__(self, name, test_inputs):
     245        self.name = name
     246        self.test_inputs = test_inputs
     247
     248    def __repr__(self):
     249        return "TestShard(name='%s', test_inputs=%s'" % (self.name, self.test_inputs)
     250
     251    def __eq__(self, other):
     252        return self.name == other.name and self.test_inputs == other.test_inputs
     253
     254
    240255class Manager(object):
    241256    """A class for managing running a series of tests on a series of layout
     
    536551                                               test_expectations.SLOW)
    537552
    538     def _shard_tests(self, test_files, use_real_shards):
     553    def _shard_tests(self, test_files, num_workers, fully_parallel):
    539554        """Groups tests into batches.
    540555        This helps ensure that tests that depend on each other (aka bad tests!)
    541556        continue to run together as most cross-tests dependencies tend to
    542         occur within the same directory. If use_real_shards is False, we
    543         put each (non-HTTP/websocket) test into its own shard for maximum
    544         concurrency instead of trying to do any sort of real sharding.
    545 
     557        occur within the same directory.
    546558        Return:
    547             Two lists of lists of TestInput objects. The first list should
    548             only be run under the server lock, the second can be run whenever.
    549         """
    550         # FIXME: We still need to support multiple locked shards.
     559            Two list of TestShards. The first contains tests that must only be
     560            run under the server lock, the second can be run whenever.
     561        """
     562
     563        # FIXME: Move all of the sharding logic out of manager into its
     564        # own class or module. Consider grouping it with the chunking logic
     565        # in prepare_lists as well.
     566        if num_workers == 1:
     567            return self._shard_in_two(test_files)
     568        elif fully_parallel:
     569            return self._shard_every_file(test_files)
     570        return self._shard_by_directory(test_files, num_workers)
     571
     572    def _shard_in_two(self, test_files):
     573        """Returns two lists of shards, one with all the tests requiring a lock and one with the rest.
     574
     575        This is used when there's only one worker, to minimize the per-shard overhead."""
     576        locked_inputs = []
     577        unlocked_inputs = []
     578        for test_file in test_files:
     579            test_input = self._get_test_input_for_file(test_file)
     580            if self._test_requires_lock(test_file):
     581                locked_inputs.append(test_input)
     582            else:
     583                unlocked_inputs.append(test_input)
     584        return [TestShard('locked_tests', locked_inputs)], [TestShard('unlocked_tests', unlocked_inputs)]
     585
     586    def _shard_every_file(self, test_files):
     587        """Returns two lists of shards, each shard containing a single test file.
     588
     589        This mode gets maximal parallelism at the cost of much higher flakiness."""
    551590        locked_shards = []
    552591        unlocked_shards = []
    553         tests_to_http_lock = []
    554         if not use_real_shards:
    555             for test_file in test_files:
    556                 test_input = self._get_test_input_for_file(test_file)
    557                 if self._test_requires_lock(test_file):
    558                     tests_to_http_lock.append(test_input)
    559                 else:
    560                     unlocked_shards.append((".", [test_input]))
    561         else:
    562             tests_by_dir = {}
    563             for test_file in test_files:
    564                 directory = self._get_dir_for_test_file(test_file)
    565                 test_input = self._get_test_input_for_file(test_file)
    566                 if self._test_requires_lock(test_file):
    567                     tests_to_http_lock.append(test_input)
    568                 else:
    569                     tests_by_dir.setdefault(directory, [])
    570                     tests_by_dir[directory].append(test_input)
    571             for directory in tests_by_dir:
    572                 test_list = tests_by_dir[directory]
    573                 test_list_tuple = (directory, test_list)
    574                 unlocked_shards.append(test_list_tuple)
    575 
    576             # Sort the shards by directory name.
    577             unlocked_shards.sort(lambda a, b: cmp(a[0], b[0]))
    578 
    579         if tests_to_http_lock:
    580             locked_shards = [("tests_to_http_lock", tests_to_http_lock)]
    581 
    582         return (locked_shards, unlocked_shards)
     592        for test_file in test_files:
     593            test_input = self._get_test_input_for_file(test_file)
     594
     595            # Note that we use a '.' for the shard name; the name doesn't really
     596            # matter, and the only other meaningful value would be the filename,
     597            # which would be really redundant.
     598            if self._test_requires_lock(test_file):
     599                locked_shards.append(TestShard('.', [test_input]))
     600            else:
     601                unlocked_shards.append(TestShard('.', [test_input]))
     602
     603        return locked_shards, unlocked_shards
     604
     605    def _shard_by_directory(self, test_files, num_workers):
     606        """Returns two lists of shards, each shard containing all the files in a directory.
     607
     608        This is the default mode, and gets as much parallelism as we can while
     609        minimizing flakiness caused by inter-test dependencies."""
     610        locked_shards = []
     611        unlocked_shards = []
     612        tests_by_dir = {}
     613        # FIXME: Given that the tests are already sorted by directory,
     614        # we can probably rewrite this to be clearer and faster.
     615        for test_file in test_files:
     616            directory = self._get_dir_for_test_file(test_file)
     617            test_input = self._get_test_input_for_file(test_file)
     618            tests_by_dir.setdefault(directory, [])
     619            tests_by_dir[directory].append(test_input)
     620
     621        for directory, test_inputs in tests_by_dir.iteritems():
     622            shard = TestShard(directory, test_inputs)
     623            if self._test_requires_lock(directory):
     624                locked_shards.append(shard)
     625            else:
     626                unlocked_shards.append(shard)
     627
     628        # Sort the shards by directory name.
     629        locked_shards.sort(key=lambda shard: shard.name)
     630        unlocked_shards.sort(key=lambda shard: shard.name)
     631
     632        return (self._resize_shards(locked_shards, self._max_locked_shards(num_workers),
     633                                    'locked_shard'),
     634                unlocked_shards)
     635
     636    def _max_locked_shards(self, num_workers):
     637        # Put a ceiling on the number of locked shards, so that we
     638        # don't hammer the servers too badly.
     639
     640        # FIXME: For now, limit to one shard. After testing to make sure we
     641        # can handle multiple shards, we should probably do something like
     642        # limit this to no more than a quarter of all workers, e.g.:
     643        # return max(math.ceil(num_workers / 4.0), 1)
     644        return 1
     645
     646    def _resize_shards(self, old_shards, max_new_shards, shard_name_prefix):
     647        """Takes a list of shards and redistributes the tests into no more
     648        than |max_new_shards| new shards."""
     649
     650        # This implementation assumes that each input shard only contains tests from a
     651        # single directory, and that tests in each shard must remain together; as a
     652        # result, a given input shard is never split between output shards.
     653        #
     654        # Each output shard contains the tests from one or more input shards and
     655        # hence may contain tests from multiple directories.
     656
     657        def divide_and_round_up(numerator, divisor):
     658            return int(math.ceil(float(numerator) / divisor))
     659
     660        def extract_and_flatten(shards):
     661            test_inputs = []
     662            for shard in shards:
     663                test_inputs.extend(shard.test_inputs)
     664            return test_inputs
     665
     666        def split_at(seq, index):
     667            return (seq[:index], seq[index:])
     668
     669        num_old_per_new = divide_and_round_up(len(old_shards), max_new_shards)
     670        new_shards = []
     671        remaining_shards = old_shards
     672        while remaining_shards:
     673            some_shards, remaining_shards = split_at(remaining_shards, num_old_per_new)
     674            new_shards.append(TestShard('%s_%d' % (shard_name_prefix, len(new_shards) + 1),
     675                                        extract_and_flatten(some_shards)))
     676        return new_shards
    583677
    584678    def _contains_tests(self, subdir):
     
    588682        return False
    589683
    590     def _num_workers(self, num_shards):
    591         num_workers = min(int(self._options.child_processes), num_shards)
     684    def _log_num_workers(self, num_workers, num_shards, num_locked_shards):
    592685        driver_name = self._port.driver_name()
    593686        if num_workers == 1:
     
    595688                (driver_name, grammar.pluralize('shard', num_shards)))
    596689        else:
    597             self._printer.print_config("Running %d %ss in parallel over %d shards" %
    598                 (num_workers, driver_name, num_shards))
    599         return num_workers
     690            self._printer.print_config("Running %d %ss in parallel over %d shards (%d locked)" %
     691                (num_workers, driver_name, num_shards, num_locked_shards))
    600692
    601693    def _run_tests(self, file_list, result_summary):
     
    626718        self._printer.print_update('Sharding tests ...')
    627719        locked_shards, unlocked_shards = self._shard_tests(file_list,
    628             int(self._options.child_processes) > 1 and not self._options.experimental_fully_parallel)
     720            int(self._options.child_processes), self._options.experimental_fully_parallel)
    629721
    630722        # FIXME: We don't have a good way to coordinate the workers so that
     
    641733            self.start_servers_with_lock()
    642734
    643         num_workers = self._num_workers(len(all_shards))
     735        num_workers = min(int(self._options.child_processes), len(all_shards))
     736        self._log_num_workers(num_workers, len(all_shards), len(locked_shards))
     737
    644738        manager_connection = manager_worker_broker.get(self._port, self._options,
    645739                                                       self, worker.Worker)
     
    664758        self._printer.print_update("Starting testing ...")
    665759        for shard in all_shards:
    666             manager_connection.post_message('test_list', shard[0], shard[1])
     760            # FIXME: Change 'test_list' to 'shard', make sharding public.
     761            manager_connection.post_message('test_list', shard.name, shard.test_inputs)
    667762
    668763        # We post one 'stop' message for each worker. Because the stop message
     
    13471442        def find(name, test_lists):
    13481443            for i in range(len(test_lists)):
    1349                 if test_lists[i][0] == name:
     1444                if test_lists[i].name == name:
    13501445                    return i
    13511446            return -1
  • trunk/Tools/Scripts/webkitpy/layout_tests/layout_package/manager_unittest.py

    r90417 r90419  
    4040from webkitpy import layout_tests
    4141from webkitpy.layout_tests import run_webkit_tests
    42 from webkitpy.layout_tests.layout_package.manager import Manager, natural_sort_key, path_key, TestRunInterruptedException
     42from webkitpy.layout_tests.layout_package.manager import Manager, natural_sort_key, path_key, TestRunInterruptedException, TestShard
    4343from webkitpy.layout_tests.layout_package import printing
    4444from webkitpy.layout_tests.layout_package.result_summary import ResultSummary
     
    5151
    5252
    53 class ManagerTest(unittest.TestCase):
    54     def test_shard_tests(self):
    55         # Test that _shard_tests in test_runner.TestRunner really
    56         # put the http tests first in the queue.
     53class ShardingTests(unittest.TestCase):
     54    # FIXME: Remove "LayoutTests" from this if we can ever convert the generic
     55    # code from filenames to test names.
     56    test_list = [
     57        "LayoutTests/http/tests/websocket/tests/unicode.htm",
     58        "LayoutTests/animations/keyframes.html",
     59        "LayoutTests/http/tests/security/view-source-no-refresh.html",
     60        "LayoutTests/http/tests/websocket/tests/websocket-protocol-ignored.html",
     61        "LayoutTests/fast/css/display-none-inline-style-change-crash.html",
     62        "LayoutTests/http/tests/xmlhttprequest/supported-xml-content-types.html",
     63        "LayoutTests/dom/html/level2/html/HTMLAnchorElement03.html",
     64        "LayoutTests/ietestcenter/Javascript/11.1.5_4-4-c-1.html",
     65        "LayoutTests/dom/html/level2/html/HTMLAnchorElement06.html",
     66    ]
     67
     68    def get_shards(self, num_workers, fully_parallel):
    5769        port = Mock()
    5870        port._filesystem = filesystem_mock.MockFileSystem()
    59         manager = ManagerWrapper(port=port, options=Mock(), printer=Mock())
    60 
    61         test_list = [
    62           "LayoutTests/websocket/tests/unicode.htm",
    63           "LayoutTests/animations/keyframes.html",
    64           "LayoutTests/http/tests/security/view-source-no-refresh.html",
    65           "LayoutTests/websocket/tests/websocket-protocol-ignored.html",
    66           "LayoutTests/fast/css/display-none-inline-style-change-crash.html",
    67           "LayoutTests/http/tests/xmlhttprequest/supported-xml-content-types.html",
    68           "LayoutTests/dom/html/level2/html/HTMLAnchorElement03.html",
    69           "LayoutTests/ietestcenter/Javascript/11.1.5_4-4-c-1.html",
    70           "LayoutTests/dom/html/level2/html/HTMLAnchorElement06.html",
    71         ]
    72 
    73         expected_tests_to_http_lock = set([
    74           'LayoutTests/websocket/tests/unicode.htm',
    75           'LayoutTests/http/tests/security/view-source-no-refresh.html',
    76           'LayoutTests/websocket/tests/websocket-protocol-ignored.html',
    77           'LayoutTests/http/tests/xmlhttprequest/supported-xml-content-types.html',
    78         ])
    79 
    80         single_locked, single_unlocked = manager._shard_tests(test_list, False)
    81         multi_locked, multi_unlocked = manager._shard_tests(test_list, True)
    82 
    83         self.assertEqual("tests_to_http_lock", single_locked[0][0])
    84         self.assertEqual(expected_tests_to_http_lock, set(single_locked[0][1]))
    85         self.assertEqual("tests_to_http_lock", multi_locked[0][0])
    86         self.assertEqual(expected_tests_to_http_lock, set(multi_locked[0][1]))
     71        self.manager = ManagerWrapper(port=port, options=Mock(), printer=Mock())
     72        return self.manager._shard_tests(self.test_list, num_workers, fully_parallel)
     73
     74    def test_shard_by_dir(self):
     75        locked, unlocked = self.get_shards(num_workers=2, fully_parallel=False)
     76
     77        # Note that although there are tests in multiple dirs that need locks,
     78        # they are crammed into a single shard in order to reduce the # of
     79        # workers hitting the server at once.
     80        self.assertEquals(locked,
     81            [TestShard('locked_shard_1',
     82              ['LayoutTests/http/tests/security/view-source-no-refresh.html',
     83               'LayoutTests/http/tests/websocket/tests/unicode.htm',
     84               'LayoutTests/http/tests/websocket/tests/websocket-protocol-ignored.html',
     85               'LayoutTests/http/tests/xmlhttprequest/supported-xml-content-types.html'])])
     86        self.assertEquals(unlocked,
     87            [TestShard('animations',
     88                       ['LayoutTests/animations/keyframes.html']),
     89             TestShard('dom/html/level2/html',
     90                       ['LayoutTests/dom/html/level2/html/HTMLAnchorElement03.html',
     91                        'LayoutTests/dom/html/level2/html/HTMLAnchorElement06.html']),
     92             TestShard('fast/css',
     93                       ['LayoutTests/fast/css/display-none-inline-style-change-crash.html']),
     94             TestShard('ietestcenter/Javascript',
     95                       ['LayoutTests/ietestcenter/Javascript/11.1.5_4-4-c-1.html'])])
     96
     97    def test_shard_every_file(self):
     98        locked, unlocked = self.get_shards(num_workers=2, fully_parallel=True)
     99        self.assertEquals(locked,
     100            [TestShard('.', ['LayoutTests/http/tests/websocket/tests/unicode.htm']),
     101             TestShard('.', ['LayoutTests/http/tests/security/view-source-no-refresh.html']),
     102             TestShard('.', ['LayoutTests/http/tests/websocket/tests/websocket-protocol-ignored.html']),
     103             TestShard('.', ['LayoutTests/http/tests/xmlhttprequest/supported-xml-content-types.html'])])
     104        self.assertEquals(unlocked,
     105            [TestShard('.', ['LayoutTests/animations/keyframes.html']),
     106             TestShard('.', ['LayoutTests/fast/css/display-none-inline-style-change-crash.html']),
     107             TestShard('.', ['LayoutTests/dom/html/level2/html/HTMLAnchorElement03.html']),
     108             TestShard('.', ['LayoutTests/ietestcenter/Javascript/11.1.5_4-4-c-1.html']),
     109             TestShard('.', ['LayoutTests/dom/html/level2/html/HTMLAnchorElement06.html'])])
     110
     111    def test_shard_in_two(self):
     112        locked, unlocked = self.get_shards(num_workers=1, fully_parallel=False)
     113        self.assertEquals(locked,
     114            [TestShard('locked_tests',
     115                       ['LayoutTests/http/tests/websocket/tests/unicode.htm',
     116                        'LayoutTests/http/tests/security/view-source-no-refresh.html',
     117                        'LayoutTests/http/tests/websocket/tests/websocket-protocol-ignored.html',
     118                        'LayoutTests/http/tests/xmlhttprequest/supported-xml-content-types.html'])])
     119        self.assertEquals(unlocked,
     120            [TestShard('unlocked_tests',
     121                       ['LayoutTests/animations/keyframes.html',
     122                        'LayoutTests/fast/css/display-none-inline-style-change-crash.html',
     123                        'LayoutTests/dom/html/level2/html/HTMLAnchorElement03.html',
     124                        'LayoutTests/ietestcenter/Javascript/11.1.5_4-4-c-1.html',
     125                        'LayoutTests/dom/html/level2/html/HTMLAnchorElement06.html'])])
    87126
    88127    def test_http_locking(tester):
     
    94133            def handle_finished_list(self, source, list_name, num_tests, elapsed_time):
    95134                if not self._finished_list_called:
    96                     tester.assertEquals(list_name, 'tests_to_http_lock')
     135                    tester.assertEquals(list_name, 'locked_tests')
    97136                    tester.assertTrue(self._remaining_locked_shards)
    98137                    tester.assertTrue(self._has_http_lock)
Note: See TracChangeset for help on using the changeset viewer.