Changeset 176211 in webkit
- Timestamp:
- Nov 17, 2014 11:06:38 AM (9 years ago)
- Location:
- trunk/Tools
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Tools/ChangeLog
r176198 r176211 1 2014-11-17 Jake Nielsen <jacob_nielsen@apple.com> 2 3 Having 30+ flaky failures breaks EWS 4 https://bugs.webkit.org/show_bug.cgi?id=138743 5 6 Reviewed by Alexey Proskuryakov. 7 8 Adds tests to ensure that the problem has been solved. 9 * Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py: 10 (test_first_failure_limit): 11 (test_first_failure_limit_with_some_tree_redness): 12 (test_second_failure_limit): 13 (test_tree_failure_limit_with_patch_that_potentially_fixes_some_redness): 14 (test_first_and_second_failure_limit): 15 (test_first_and_clean_failure_limit): 16 (test_first_second_and_clean_failure_limit): 17 (test_very_red_tree_retry): Deleted. 18 Really this was renamed to test_first_second_and_clean_failure_limit. 19 * Scripts/webkitpy/tool/bot/patchanalysistask.py: 20 Makes the appropriate changes to PatchAnalysisTask to make sure that 21 even when the first test run hits the failure limit, it will still try 22 a second run. 23 (PatchAnalysisTask._results_failed_different_tests): 24 (PatchAnalysisTask._test_patch): 25 (PatchAnalysisTask._continue_testing_patch_that_exceeded_failure_limit_on_first_or_second_try): Deleted. 26 1 27 2014-11-17 Ting-Wei Lan <lantw44@gmail.com> 2 28 -
trunk/Tools/Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py
r175735 r176211 160 160 161 161 162 _lots_of_failing_tests = map(lambda num: "test-%s.html" % num, range(0, 100)) 163 164 162 165 class CommitQueueTaskTest(unittest.TestCase): 163 166 def _run_and_expect_patch_analysis_result(self, commit_queue, expected_analysis_result, expected_reported_flaky_tests=[], expect_clean_tests_to_run=False, expected_failure_status_id=0): … … 399 402 self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.PASS, expect_clean_tests_to_run=True) 400 403 401 def test_very_red_tree_retry(self): 402 lots_of_failing_tests = map(lambda num: "test-%s.html" % num, range(0, 100)) 403 commit_queue = MockSimpleTestPlanCommitQueue( 404 first_test_failures=lots_of_failing_tests, 405 second_test_failures=lots_of_failing_tests, 406 clean_test_failures=lots_of_failing_tests) 404 def test_first_failure_limit(self): 405 commit_queue = MockSimpleTestPlanCommitQueue( 406 first_test_failures=_lots_of_failing_tests, 407 second_test_failures=[], 408 clean_test_failures=[]) 409 410 self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True, expected_failure_status_id=1) 411 412 def test_first_failure_limit_with_some_tree_redness(self): 413 commit_queue = MockSimpleTestPlanCommitQueue( 414 first_test_failures=_lots_of_failing_tests, 415 second_test_failures=["Fail1", "Fail2", "Fail3"], 416 clean_test_failures=["Fail1", "Fail2", "Fail3"]) 417 418 self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True, expected_failure_status_id=1) 419 420 def test_second_failure_limit(self): 421 # There need to be some failures in the first set of tests, or it won't even make it to the second test. 422 commit_queue = MockSimpleTestPlanCommitQueue( 423 first_test_failures=["Fail1", "Fail2", "Fail3"], 424 second_test_failures=_lots_of_failing_tests, 425 clean_test_failures=["Fail1", "Fail2", "Fail3"]) 426 427 self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True, expected_failure_status_id=2) 428 429 def test_tree_failure_limit_with_patch_that_potentially_fixes_some_redness(self): 430 commit_queue = MockSimpleTestPlanCommitQueue( 431 first_test_failures=["Fail1", "Fail2", "Fail3"], 432 second_test_failures=["Fail1", "Fail2", "Fail3"], 433 clean_test_failures=_lots_of_failing_tests) 434 435 # Unfortunately there are cases where the clean build will randomly fail enough tests to hit the failure limit. 436 # With that in mind, we can't actually know that this patch is good or bad until we see a clean run that doesn't 437 # exceed the failure limit. 438 self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True) 439 440 def test_first_and_second_failure_limit(self): 441 commit_queue = MockSimpleTestPlanCommitQueue( 442 first_test_failures=_lots_of_failing_tests, 443 second_test_failures=_lots_of_failing_tests, 444 clean_test_failures=[]) 445 446 self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.FAIL, expect_clean_tests_to_run=True, expected_failure_status_id=1) 447 448 def test_first_and_clean_failure_limit(self): 449 commit_queue = MockSimpleTestPlanCommitQueue( 450 first_test_failures=_lots_of_failing_tests, 451 second_test_failures=[], 452 clean_test_failures=_lots_of_failing_tests) 453 454 self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True) 455 456 def test_first_second_and_clean_failure_limit(self): 457 commit_queue = MockSimpleTestPlanCommitQueue( 458 first_test_failures=_lots_of_failing_tests, 459 second_test_failures=_lots_of_failing_tests, 460 clean_test_failures=_lots_of_failing_tests) 407 461 408 462 self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True) -
trunk/Tools/Scripts/webkitpy/tool/bot/patchanalysistask.py
r175735 r176211 183 183 return first_failing_tests != second_failing_tests 184 184 185 def _continue_testing_patch_that_exceeded_failure_limit_on_first_or_second_try(self, results, results_archive, script_error):186 self._build_and_test_without_patch()187 188 # If we've made it here, then many (500) tests are failing with the patch applied, but189 # if the clean tree is also failing many tests, even if it's not quite as many (495),190 # then we can't be certain that the discrepancy isn't due to flakiness, and hence we must191 # defer judgement.192 if (len(results.failing_tests()) - len(self._delegate.test_results().failing_tests())) <= 5:193 return False194 195 return self.report_failure(results_archive, results, script_error)196 197 185 def _should_defer_patch_or_throw(self, failures_with_patch, results_archive_for_failures_with_patch, script_error, failure_id): 198 186 self._build_and_test_without_patch() … … 224 212 first_failure_status_id = self.failure_status_id 225 213 226 if first_results.did_exceed_test_failure_limit(): 227 return self._continue_testing_patch_that_exceeded_failure_limit_on_first_or_second_try(first_results, first_results_archive, first_script_error) 228 229 if self._test(): 214 if self._test() and not first_results.did_exceed_test_failure_limit(): 230 215 # Only report flaky tests if we were successful at parsing results.json and archiving results. 231 216 if first_results and first_results_archive: … … 238 223 second_failure_status_id = self.failure_status_id 239 224 225 if second_results.did_exceed_test_failure_limit() and first_results.did_exceed_test_failure_limit(): 226 self._build_and_test_without_patch() 227 clean_tree_results = self._delegate.test_results() 228 229 if (len(first_results.failing_tests()) - len(clean_tree_results.failing_tests())) <= 5: 230 return False 231 232 self.failure_status_id = first_failure_status_id 233 234 return self.report_failure(first_results_archive, first_results, first_script_error) 235 240 236 if second_results.did_exceed_test_failure_limit(): 241 return self._continue_testing_patch_that_exceeded_failure_limit_on_first_or_second_try(second_results, second_results_archive, second_script_error) 237 self._should_defer_patch_or_throw(first_results.failing_test_results(), first_results_archive, first_script_error, first_failure_status_id) 238 return False 239 240 if first_results.did_exceed_test_failure_limit(): 241 self._should_defer_patch_or_throw(second_results.failing_test_results(), second_results_archive, second_script_error, second_failure_status_id) 242 return False 242 243 243 244 if self._results_failed_different_tests(first_results, second_results):
Note: See TracChangeset
for help on using the changeset viewer.