Changeset 176211 in webkit


Ignore:
Timestamp:
Nov 17, 2014 11:06:38 AM (9 years ago)
Author:
commit-queue@webkit.org
Message:

Having 30+ flaky failures breaks EWS
https://bugs.webkit.org/show_bug.cgi?id=138743

Patch by Jake Nielsen <jacob_nielsen@apple.com> on 2014-11-17
Reviewed by Alexey Proskuryakov.

Adds tests to ensure that the problem has been solved.

  • Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py:

(test_first_failure_limit):
(test_first_failure_limit_with_some_tree_redness):
(test_second_failure_limit):
(test_tree_failure_limit_with_patch_that_potentially_fixes_some_redness):
(test_first_and_second_failure_limit):
(test_first_and_clean_failure_limit):
(test_first_second_and_clean_failure_limit):
(test_very_red_tree_retry): Deleted.
Really this was renamed to test_first_second_and_clean_failure_limit.

  • Scripts/webkitpy/tool/bot/patchanalysistask.py:

Makes the appropriate changes to PatchAnalysisTask to make sure that
even when the first test run hits the failure limit, it will still try
a second run.
(PatchAnalysisTask._results_failed_different_tests):
(PatchAnalysisTask._test_patch):
(PatchAnalysisTask._continue_testing_patch_that_exceeded_failure_limit_on_first_or_second_try): Deleted.

Location:
trunk/Tools
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/Tools/ChangeLog

    r176198 r176211  
     12014-11-17  Jake Nielsen  <jacob_nielsen@apple.com>
     2
     3        Having 30+ flaky failures breaks EWS
     4        https://bugs.webkit.org/show_bug.cgi?id=138743
     5
     6        Reviewed by Alexey Proskuryakov.
     7
     8        Adds tests to ensure that the problem has been solved.
     9        * Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py:
     10        (test_first_failure_limit):
     11        (test_first_failure_limit_with_some_tree_redness):
     12        (test_second_failure_limit):
     13        (test_tree_failure_limit_with_patch_that_potentially_fixes_some_redness):
     14        (test_first_and_second_failure_limit):
     15        (test_first_and_clean_failure_limit):
     16        (test_first_second_and_clean_failure_limit):
     17        (test_very_red_tree_retry): Deleted.
     18        Really this was renamed to test_first_second_and_clean_failure_limit.
     19        * Scripts/webkitpy/tool/bot/patchanalysistask.py:
     20        Makes the appropriate changes to PatchAnalysisTask to make sure that
     21        even when the first test run hits the failure limit, it will still try
     22        a second run.
     23        (PatchAnalysisTask._results_failed_different_tests):
     24        (PatchAnalysisTask._test_patch):
     25        (PatchAnalysisTask._continue_testing_patch_that_exceeded_failure_limit_on_first_or_second_try): Deleted.
     26
    1272014-11-17  Ting-Wei Lan  <lantw44@gmail.com>
    228
  • trunk/Tools/Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py

    r175735 r176211  
    160160
    161161
     162_lots_of_failing_tests = map(lambda num: "test-%s.html" % num, range(0, 100))
     163
     164
    162165class CommitQueueTaskTest(unittest.TestCase):
    163166    def _run_and_expect_patch_analysis_result(self, commit_queue, expected_analysis_result, expected_reported_flaky_tests=[], expect_clean_tests_to_run=False, expected_failure_status_id=0):
     
    399402        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.PASS, expect_clean_tests_to_run=True)
    400403
    401     def test_very_red_tree_retry(self):
    402         lots_of_failing_tests = map(lambda num: "test-%s.html" % num, range(0, 100))
    403         commit_queue = MockSimpleTestPlanCommitQueue(
    404             first_test_failures=lots_of_failing_tests,
    405             second_test_failures=lots_of_failing_tests,
    406             clean_test_failures=lots_of_failing_tests)
     404    def test_first_failure_limit(self):
     405        commit_queue = MockSimpleTestPlanCommitQueue(
     406            first_test_failures=_lots_of_failing_tests,
     407            second_test_failures=[],
     408            clean_test_failures=[])
     409
     410        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True, expected_failure_status_id=1)
     411
     412    def test_first_failure_limit_with_some_tree_redness(self):
     413        commit_queue = MockSimpleTestPlanCommitQueue(
     414            first_test_failures=_lots_of_failing_tests,
     415            second_test_failures=["Fail1", "Fail2", "Fail3"],
     416            clean_test_failures=["Fail1", "Fail2", "Fail3"])
     417
     418        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True, expected_failure_status_id=1)
     419
     420    def test_second_failure_limit(self):
     421        # There need to be some failures in the first set of tests, or it won't even make it to the second test.
     422        commit_queue = MockSimpleTestPlanCommitQueue(
     423            first_test_failures=["Fail1", "Fail2", "Fail3"],
     424            second_test_failures=_lots_of_failing_tests,
     425            clean_test_failures=["Fail1", "Fail2", "Fail3"])
     426
     427        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True, expected_failure_status_id=2)
     428
     429    def test_tree_failure_limit_with_patch_that_potentially_fixes_some_redness(self):
     430        commit_queue = MockSimpleTestPlanCommitQueue(
     431            first_test_failures=["Fail1", "Fail2", "Fail3"],
     432            second_test_failures=["Fail1", "Fail2", "Fail3"],
     433            clean_test_failures=_lots_of_failing_tests)
     434
     435        # Unfortunately there are cases where the clean build will randomly fail enough tests to hit the failure limit.
     436        # With that in mind, we can't actually know that this patch is good or bad until we see a clean run that doesn't
     437        # exceed the failure limit.
     438        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True)
     439
     440    def test_first_and_second_failure_limit(self):
     441        commit_queue = MockSimpleTestPlanCommitQueue(
     442            first_test_failures=_lots_of_failing_tests,
     443            second_test_failures=_lots_of_failing_tests,
     444            clean_test_failures=[])
     445
     446        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.FAIL, expect_clean_tests_to_run=True, expected_failure_status_id=1)
     447
     448    def test_first_and_clean_failure_limit(self):
     449        commit_queue = MockSimpleTestPlanCommitQueue(
     450            first_test_failures=_lots_of_failing_tests,
     451            second_test_failures=[],
     452            clean_test_failures=_lots_of_failing_tests)
     453
     454        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True)
     455
     456    def test_first_second_and_clean_failure_limit(self):
     457        commit_queue = MockSimpleTestPlanCommitQueue(
     458            first_test_failures=_lots_of_failing_tests,
     459            second_test_failures=_lots_of_failing_tests,
     460            clean_test_failures=_lots_of_failing_tests)
    407461
    408462        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True)
  • trunk/Tools/Scripts/webkitpy/tool/bot/patchanalysistask.py

    r175735 r176211  
    183183        return first_failing_tests != second_failing_tests
    184184
    185     def _continue_testing_patch_that_exceeded_failure_limit_on_first_or_second_try(self, results, results_archive, script_error):
    186         self._build_and_test_without_patch()
    187 
    188         # If we've made it here, then many (500) tests are failing with the patch applied, but
    189         # if the clean tree is also failing many tests, even if it's not quite as many (495),
    190         # then we can't be certain that the discrepancy isn't due to flakiness, and hence we must
    191         # defer judgement.
    192         if (len(results.failing_tests()) - len(self._delegate.test_results().failing_tests())) <= 5:
    193             return False
    194 
    195         return self.report_failure(results_archive, results, script_error)
    196 
    197185    def _should_defer_patch_or_throw(self, failures_with_patch, results_archive_for_failures_with_patch, script_error, failure_id):
    198186        self._build_and_test_without_patch()
     
    224212        first_failure_status_id = self.failure_status_id
    225213
    226         if first_results.did_exceed_test_failure_limit():
    227             return self._continue_testing_patch_that_exceeded_failure_limit_on_first_or_second_try(first_results, first_results_archive, first_script_error)
    228 
    229         if self._test():
     214        if self._test() and not first_results.did_exceed_test_failure_limit():
    230215            # Only report flaky tests if we were successful at parsing results.json and archiving results.
    231216            if first_results and first_results_archive:
     
    238223        second_failure_status_id = self.failure_status_id
    239224
     225        if second_results.did_exceed_test_failure_limit() and first_results.did_exceed_test_failure_limit():
     226            self._build_and_test_without_patch()
     227            clean_tree_results = self._delegate.test_results()
     228
     229            if (len(first_results.failing_tests()) - len(clean_tree_results.failing_tests())) <= 5:
     230                return False
     231
     232            self.failure_status_id = first_failure_status_id
     233
     234            return self.report_failure(first_results_archive, first_results, first_script_error)
     235
    240236        if second_results.did_exceed_test_failure_limit():
    241             return self._continue_testing_patch_that_exceeded_failure_limit_on_first_or_second_try(second_results, second_results_archive, second_script_error)
     237            self._should_defer_patch_or_throw(first_results.failing_test_results(), first_results_archive, first_script_error, first_failure_status_id)
     238            return False
     239
     240        if first_results.did_exceed_test_failure_limit():
     241            self._should_defer_patch_or_throw(second_results.failing_test_results(), second_results_archive, second_script_error, second_failure_status_id)
     242            return False
    242243
    243244        if self._results_failed_different_tests(first_results, second_results):
Note: See TracChangeset for help on using the changeset viewer.