Context Navigation

← Previous Changeset
Next Changeset →

Changeset 176211 in webkit

Timestamp:

Nov 17, 2014 11:06:38 AM (9 years ago)

Author:

commit-queue@webkit.org

Message:

Having 30+ flaky failures breaks EWS
https://bugs.webkit.org/show_bug.cgi?id=138743

Patch by Jake Nielsen <jacob_nielsen@apple.com> on 2014-11-17
Reviewed by Alexey Proskuryakov.

Adds tests to ensure that the problem has been solved.

Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py:

(test_first_failure_limit):
(test_first_failure_limit_with_some_tree_redness):
(test_second_failure_limit):
(test_tree_failure_limit_with_patch_that_potentially_fixes_some_redness):
(test_first_and_second_failure_limit):
(test_first_and_clean_failure_limit):
(test_first_second_and_clean_failure_limit):
(test_very_red_tree_retry): Deleted.
Really this was renamed to test_first_second_and_clean_failure_limit.

Scripts/webkitpy/tool/bot/patchanalysistask.py:

Makes the appropriate changes to PatchAnalysisTask to make sure that
even when the first test run hits the failure limit, it will still try
a second run.
(PatchAnalysisTask._results_failed_different_tests):
(PatchAnalysisTask._test_patch):
(PatchAnalysisTask._continue_testing_patch_that_exceeded_failure_limit_on_first_or_second_try): Deleted.

Location:

trunk/Tools

Files:

: 3 edited

ChangeLog (modified) (1 diff)
Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py (modified) (2 diffs)
Scripts/webkitpy/tool/bot/patchanalysistask.py (modified) (3 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/Tools/ChangeLog

-                      r176198
+                      r176211
+-11-17  Jake Nielsen  <jacob_nielsen@apple.com>
+        Having 30+ flaky failures breaks EWS
+        https://bugs.webkit.org/show_bug.cgi?id=138743
+        Reviewed by Alexey Proskuryakov.
+        Adds tests to ensure that the problem has been solved.
+        * Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py:
+        (test_first_failure_limit):
+        (test_first_failure_limit_with_some_tree_redness):
+        (test_second_failure_limit):
+        (test_tree_failure_limit_with_patch_that_potentially_fixes_some_redness):
+        (test_first_and_second_failure_limit):
+        (test_first_and_clean_failure_limit):
+        (test_first_second_and_clean_failure_limit):
+        (test_very_red_tree_retry): Deleted.
+        Really this was renamed to test_first_second_and_clean_failure_limit.
+        * Scripts/webkitpy/tool/bot/patchanalysistask.py:
+        Makes the appropriate changes to PatchAnalysisTask to make sure that
+        even when the first test run hits the failure limit, it will still try
+        a second run.
+        (PatchAnalysisTask._results_failed_different_tests):
+        (PatchAnalysisTask._test_patch):
+        (PatchAnalysisTask._continue_testing_patch_that_exceeded_failure_limit_on_first_or_second_try): Deleted.
 -11-17  Ting-Wei Lan  <lantw44@gmail.com>

trunk/Tools/Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py

-                      r175735
+                      r176211
+_lots_of_failing_tests = map(lambda num: "test-%s.html" % num, range(0, 100))
 class CommitQueueTaskTest(unittest.TestCase):
     def _run_and_expect_patch_analysis_result(self, commit_queue, expected_analysis_result, expected_reported_flaky_tests=[], expect_clean_tests_to_run=False, expected_failure_status_id=0):
 …
         self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.PASS, expect_clean_tests_to_run=True)
+    def test_very_red_tree_retry(self):
+        lots_of_failing_tests = map(lambda num: "test-%s.html" % num, range(0, 100))
+        commit_queue = MockSimpleTestPlanCommitQueue(
+            first_test_failures=lots_of_failing_tests,
+            second_test_failures=lots_of_failing_tests,
+            clean_test_failures=lots_of_failing_tests)
+    def test_first_failure_limit(self):
+        commit_queue = MockSimpleTestPlanCommitQueue(
+            first_test_failures=_lots_of_failing_tests,
+            second_test_failures=[],
+            clean_test_failures=[])
+        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True, expected_failure_status_id=1)
+    def test_first_failure_limit_with_some_tree_redness(self):
+        commit_queue = MockSimpleTestPlanCommitQueue(
+            first_test_failures=_lots_of_failing_tests,
+            second_test_failures=["Fail1", "Fail2", "Fail3"],
+            clean_test_failures=["Fail1", "Fail2", "Fail3"])
+        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True, expected_failure_status_id=1)
+    def test_second_failure_limit(self):
+        # There need to be some failures in the first set of tests, or it won't even make it to the second test.
+        commit_queue = MockSimpleTestPlanCommitQueue(
+            first_test_failures=["Fail1", "Fail2", "Fail3"],
+            second_test_failures=_lots_of_failing_tests,
+            clean_test_failures=["Fail1", "Fail2", "Fail3"])
+        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True, expected_failure_status_id=2)
+    def test_tree_failure_limit_with_patch_that_potentially_fixes_some_redness(self):
+        commit_queue = MockSimpleTestPlanCommitQueue(
+            first_test_failures=["Fail1", "Fail2", "Fail3"],
+            second_test_failures=["Fail1", "Fail2", "Fail3"],
+            clean_test_failures=_lots_of_failing_tests)
+        # Unfortunately there are cases where the clean build will randomly fail enough tests to hit the failure limit.
+        # With that in mind, we can't actually know that this patch is good or bad until we see a clean run that doesn't
+        # exceed the failure limit.
+        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True)
+    def test_first_and_second_failure_limit(self):
+        commit_queue = MockSimpleTestPlanCommitQueue(
+            first_test_failures=_lots_of_failing_tests,
+            second_test_failures=_lots_of_failing_tests,
+            clean_test_failures=[])
+        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.FAIL, expect_clean_tests_to_run=True, expected_failure_status_id=1)
+    def test_first_and_clean_failure_limit(self):
+        commit_queue = MockSimpleTestPlanCommitQueue(
+            first_test_failures=_lots_of_failing_tests,
+            second_test_failures=[],
+            clean_test_failures=_lots_of_failing_tests)
+        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True)
+    def test_first_second_and_clean_failure_limit(self):
+        commit_queue = MockSimpleTestPlanCommitQueue(
+            first_test_failures=_lots_of_failing_tests,
+            second_test_failures=_lots_of_failing_tests,
+            clean_test_failures=_lots_of_failing_tests)
         self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True)

trunk/Tools/Scripts/webkitpy/tool/bot/patchanalysistask.py

-                      r175735
+                      r176211
         return first_failing_tests != second_failing_tests
-    def _continue_testing_patch_that_exceeded_failure_limit_on_first_or_second_try(self, results, results_archive, script_error):
-        self._build_and_test_without_patch()
-        # If we've made it here, then many (500) tests are failing with the patch applied, but
-        # if the clean tree is also failing many tests, even if it's not quite as many (495),
-        # then we can't be certain that the discrepancy isn't due to flakiness, and hence we must
-        # defer judgement.
-        if (len(results.failing_tests()) - len(self._delegate.test_results().failing_tests())) <= 5:
-            return False
-        return self.report_failure(results_archive, results, script_error)
     def _should_defer_patch_or_throw(self, failures_with_patch, results_archive_for_failures_with_patch, script_error, failure_id):
         self._build_and_test_without_patch()
 …
         first_failure_status_id = self.failure_status_id
+        if first_results.did_exceed_test_failure_limit():
+            return self._continue_testing_patch_that_exceeded_failure_limit_on_first_or_second_try(first_results, first_results_archive, first_script_error)
+        if self._test():
+        if self._test() and not first_results.did_exceed_test_failure_limit():
             # Only report flaky tests if we were successful at parsing results.json and archiving results.
             if first_results and first_results_archive:
 …
         second_failure_status_id = self.failure_status_id
+        if second_results.did_exceed_test_failure_limit() and first_results.did_exceed_test_failure_limit():
+            self._build_and_test_without_patch()
+            clean_tree_results = self._delegate.test_results()
+            if (len(first_results.failing_tests()) - len(clean_tree_results.failing_tests())) <= 5:
+                return False
+            self.failure_status_id = first_failure_status_id
+            return self.report_failure(first_results_archive, first_results, first_script_error)
         if second_results.did_exceed_test_failure_limit():
+            return self._continue_testing_patch_that_exceeded_failure_limit_on_first_or_second_try(second_results, second_results_archive, second_script_error)
+            self._should_defer_patch_or_throw(first_results.failing_test_results(), first_results_archive, first_script_error, first_failure_status_id)
+            return False
+        if first_results.did_exceed_test_failure_limit():
+            self._should_defer_patch_or_throw(second_results.failing_test_results(), second_results_archive, second_script_error, second_failure_status_id)
+            return False
         if self._results_failed_different_tests(first_results, second_results):

Note: See TracChangeset for help on using the changeset viewer.