Context Navigation

← Previous Changeset
Next Changeset →

Changeset 74408 in webkit

Timestamp:

Dec 21, 2010 4:55:45 AM (13 years ago)

Author:

eric@webkit.org

Message:

2010-12-21 Eric Seidel <eric@webkit.org>

Reviewed by Adam Barth.

commit-queue will report constant failures as flaky if other tests flake
https://bugs.webkit.org/show_bug.cgi?id=51272

This patch just removes functionality and adds testing.
Previously we attempted to report flaky tests when we had
two different tests fail in a row. However, since we stop
running the tests at the first failure, our code was wrong in
trying to determine flakiness from the incomplete runs.

Originally I posted an alternate patch:
https://bug-51272-attachments.webkit.org/attachment.cgi?id=77078
which fixed our flaky logic in this case, however it was decided
that that patch would be too difficult to maintain, so now
I'm just removing the broken logic.

This will dramatically cut-down on our flaky-test false positives
at the (small) cost of the queues being unable to report
any flakiness if the tree is very flaky. (With at least one test
flaking on every run, we'll never report failures anymore.) I think
this is a tradeoff worth making.

Scripts/webkitpy/tool/bot/commitqueuetask.py:
Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py:

Location:

trunk/Tools

Files:

: 3 edited

ChangeLog (modified) (1 diff)
Scripts/webkitpy/tool/bot/commitqueuetask.py (modified) (2 diffs)
Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py (modified) (7 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/Tools/ChangeLog

-                      r74403
+                      r74408
+-12-21  Eric Seidel  <eric@webkit.org>
+        Reviewed by Adam Barth.
+        commit-queue will report constant failures as flaky if other tests flake
+        https://bugs.webkit.org/show_bug.cgi?id=51272
+        This patch just removes functionality and adds testing.
+        Previously we attempted to report flaky tests when we had
+        two different tests fail in a row.  However, since we stop
+        running the tests at the first failure, our code was wrong in
+        trying to determine flakiness from the incomplete runs.
+        Originally I posted an alternate patch:
+        https://bug-51272-attachments.webkit.org/attachment.cgi?id=77078
+        which fixed our flaky logic in this case, however it was decided
+        that that patch would be too difficult to maintain, so now
+        I'm just removing the broken logic.
+        This will dramatically cut-down on our flaky-test false positives
+        at the (small) cost of the queues being unable to report
+        any flakiness if the tree is very flaky.  (With at least one test
+        flaking on every run, we'll never report failures anymore.)  I think
+        this is a tradeoff worth making.
+        * Scripts/webkitpy/tool/bot/commitqueuetask.py:
+        * Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py:
 -12-20  Eric Seidel  <eric@webkit.org>

trunk/Tools/Scripts/webkitpy/tool/bot/commitqueuetask.py

-                      r74403
+                      r74408
         second_failing_tests = self._failing_tests_from_last_run()
         if first_failing_tests != second_failing_tests:
+            self._report_flaky_tests(first_failing_tests + second_failing_tests)
+            # We could report flaky tests here, but since run-webkit-tests
+            # is run with --exit-after-N-failures=1, we would need to
+            # be careful not to report constant failures as flaky due to earlier
+            # flaky test making them not fail (no results) in one of the runs.
+            # See https://bugs.webkit.org/show_bug.cgi?id=51272
             return False
 …
         if not self._validate():
             return False
+        # FIXME: We should understand why the land failure occured and retry if possible.
         if not self._land():
             raise self._script_error

trunk/Tools/Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py

-                      r74403
+                      r74408
 class CommitQueueTaskTest(unittest.TestCase):
     def _run_through_task(self, commit_queue, expected_stderr, expected_exception=None):
+    def _run_through_task(self, commit_queue, expected_stderr, expected_exception=None, expect_retry=False):
         tool = MockTool(log_executive=True)
         patch = tool.bugs.fetch_attachment(197)
         task = CommitQueueTask(commit_queue, patch)
+        OutputCapture().assert_outputs(self, task.run, expected_stderr=expected_stderr, expected_exception=expected_exception)
+        success = OutputCapture().assert_outputs(self, task.run, expected_stderr=expected_stderr, expected_exception=expected_exception)
+        if not expected_exception:
+            self.assertEqual(success, not expect_retry)
     def test_success_case(self):
 …
 command_failed: failure_message='Unable to clean working directory' script_error='MOCK clean failure' patch='197'
 """
         self._run_through_task(commit_queue, expected_stderr)
+        self._run_through_task(commit_queue, expected_stderr, expect_retry=True)
     def test_update_failure(self):
 …
 command_failed: failure_message='Unable to update working directory' script_error='MOCK update failure' patch='197'
 """
         self._run_through_task(commit_queue, expected_stderr)
+        self._run_through_task(commit_queue, expected_stderr, expect_retry=True)
     def test_apply_failure(self):
 …
 command_failed: failure_message='Unable to build without patch' script_error='MOCK clean build failure' patch='197'
 """
         self._run_through_task(commit_queue, expected_stderr)
+        self._run_through_task(commit_queue, expected_stderr, expect_retry=True)
     def test_flaky_test_failure(self):
 …
         self._run_through_task(commit_queue, expected_stderr)
+    _double_flaky_test_counter = 0
+    def test_double_flaky_test_failure(self):
+        commit_queue = MockCommitQueue([
+            None,
+            None,
+            None,
+            None,
+            ScriptError("MOCK test failure"),
+            ScriptError("MOCK test failure again"),
+        ])
+        # The (subtle) point of this test is that report_flaky_tests does not appear
+        # in the expected_stderr for this run.
+        # Note also that there is no attempt to run the tests w/o the patch.
+        expected_stderr = """run_webkit_patch: ['clean']
+command_passed: success_message='Cleaned working directory' patch='197'
+run_webkit_patch: ['update']
+command_passed: success_message='Updated working directory' patch='197'
+run_webkit_patch: ['apply-attachment', '--no-update', '--non-interactive', 197]
+command_passed: success_message='Applied patch' patch='197'
+run_webkit_patch: ['build', '--no-clean', '--no-update', '--build-style=both']
+command_passed: success_message='Built patch' patch='197'
+run_webkit_patch: ['build-and-test', '--no-clean', '--no-update', '--test', '--non-interactive']
+command_failed: failure_message='Patch does not pass tests' script_error='MOCK test failure' patch='197'
+run_webkit_patch: ['build-and-test', '--no-clean', '--no-update', '--test', '--non-interactive']
+command_failed: failure_message='Patch does not pass tests' script_error='MOCK test failure again' patch='197'
+"""
+        tool = MockTool(log_executive=True)
+        patch = tool.bugs.fetch_attachment(197)
+        task = CommitQueueTask(commit_queue, patch)
+        self._double_flaky_test_counter = 0
+        def mock_failing_tests_from_last_run():
+            CommitQueueTaskTest._double_flaky_test_counter += 1
+            if CommitQueueTaskTest._double_flaky_test_counter % 2:
+                return ['foo.html']
+            return ['bar.html']
+        task._failing_tests_from_last_run = mock_failing_tests_from_last_run
+        success = OutputCapture().assert_outputs(self, task.run, expected_stderr=expected_stderr)
+        self.assertEqual(success, False)
     def test_test_failure(self):
         commit_queue = MockCommitQueue([
 …
 command_failed: failure_message='Unable to pass tests without patch (tree is red?)' script_error='MOCK clean test failure' patch='197'
 """
         self._run_through_task(commit_queue, expected_stderr)
+        self._run_through_task(commit_queue, expected_stderr, expect_retry=True)
     def test_land_failure(self):
 …
 command_failed: failure_message='Unable to land patch' script_error='MOCK land failure' patch='197'
 """
+        # FIXME: This should really be expect_retry=True for a better user experiance.
         self._run_through_task(commit_queue, expected_stderr, ScriptError)

Note: See TracChangeset for help on using the changeset viewer.