Changeset 58210 in webkit
- Timestamp:
- Apr 24, 2010 12:03:24 AM (14 years ago)
- Location:
- trunk/WebKitTools
- Files:
-
- 9 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
trunk/WebKitTools/ChangeLog
r58206 r58210 1 2010-04-23 Eric Seidel <eric@webkit.org> 2 3 Reviewed by Adam Barth. 4 5 check-webkit-style complains about non-utf8 data in layout test result 6 https://bugs.webkit.org/show_bug.cgi?id=38027 7 8 The problem was we were assuming patch files/diff output as utf-8. 9 Turns out they're not. We have to treat them as binary data because 10 a single patch may have multiple text files in it with conflicting encodings! 11 12 * Scripts/webkitpy/common/checkout/api.py: 13 - contents_at_revision returns a byte array, so decode it to unicode 14 before passing it to parse_latest_entry_from_file 15 * Scripts/webkitpy/common/checkout/api_unittest.py: 16 - Update our mock mock_contents_at_revision to match the encoding 17 semantics of the real one. 18 * Scripts/webkitpy/common/checkout/scm.py: 19 - Be careful not to decode output which may contain file contents 20 (like diff, cat or show) as the encoding for that content is unknown. 21 * Scripts/webkitpy/common/checkout/scm_unittest.py: 22 - Update our tests to use both latin1 and utf-8 encoded data. 23 * Scripts/webkitpy/common/net/bugzilla.py: 24 - _fill_attachment_form should not assume unicode data. Callers 25 may wish to attach other types of files to bugs. 26 * Scripts/webkitpy/common/prettypatch.py: 27 - Diffs are byte arrays, deal with them as such. 28 * Scripts/webkitpy/common/prettypatch_unittest.py: 29 - Test to make sure we handle diffs with multiple conflicting encodings. 30 * Scripts/webkitpy/common/system/executive_unittest.py: 31 - Make sure that our unicode support does not break our 32 byte array input support for run_command. 33 1 34 2010-04-23 Sam Weinig <sam@webkit.org> 2 35 -
trunk/WebKitTools/Scripts/webkitpy/common/checkout/api.py
r58066 r58210 51 51 def _latest_entry_for_changelog_at_revision(self, changelog_path, revision): 52 52 changelog_contents = self._scm.contents_at_revision(changelog_path, revision) 53 return ChangeLog.parse_latest_entry_from_file(StringIO.StringIO(changelog_contents)) 53 # contents_at_revision returns a byte array (str()), but we know 54 # that ChangeLog files are utf-8. parse_latest_entry_from_file 55 # expects a file-like object which vends unicode(), so we decode here. 56 changelog_file = StringIO.StringIO(changelog_contents.decode("utf-8")) 57 return ChangeLog.parse_latest_entry_from_file(changelog_file) 54 58 55 59 def changelog_entries_for_revision(self, revision): -
trunk/WebKitTools/Scripts/webkitpy/common/checkout/api_unittest.py
r58036 r58210 128 128 self.assertEqual(changelog_path, "foo") 129 129 self.assertEqual(revision, "bar") 130 return _changelog1 130 # contents_at_revision is expected to return a byte array (str) 131 # so we encode our unicode ChangeLog down to a utf-8 stream. 132 return _changelog1.encode("utf-8") 131 133 scm.contents_at_revision = mock_contents_at_revision 132 134 checkout = Checkout(scm) -
trunk/WebKitTools/Scripts/webkitpy/common/checkout/scm.py
r58036 r58210 309 309 310 310 def working_directory_is_clean(self): 311 return run_command(["svn", "diff"], cwd=self.checkout_root ) == ""311 return run_command(["svn", "diff"], cwd=self.checkout_root, decode_output=False) == "" 312 312 313 313 def clean_working_directory(self): … … 345 345 def changed_files_for_revision(self, revision): 346 346 # As far as I can tell svn diff --summarize output looks just like svn status output. 347 # No file contents printed, thus utf-8 auto-decoding in run_command is fine. 347 348 status_command = ["svn", "diff", "--summarize", "-c", revision] 348 349 return self.run_status_and_extract_filenames(status_command, self._status_regexp("ACDMR")) … … 362 363 363 364 def create_patch(self): 364 return run_command([self.script_path("svn-create-patch")], cwd=self.checkout_root, return_stderr=False) 365 """Returns a byte array (str()) representing the patch file. 366 Patch files are effectively binary since they may contain 367 files of multiple different encodings.""" 368 return run_command([self.script_path("svn-create-patch")], 369 cwd=self.checkout_root, return_stderr=False, 370 decode_output=False) 365 371 366 372 def committer_email_for_revision(self, revision): … … 368 374 369 375 def contents_at_revision(self, path, revision): 376 """Returns a byte array (str()) containing the contents 377 of path @ revision in the repository.""" 370 378 remote_path = "%s/%s" % (self._repository_url(), path) 371 return run_command(["svn", "cat", "-r", revision, remote_path] )379 return run_command(["svn", "cat", "-r", revision, remote_path], decode_output=False) 372 380 373 381 def diff_for_revision(self, revision): … … 467 475 def status_command(self): 468 476 # git status returns non-zero when there are changes, so we use git diff name --name-status HEAD instead. 477 # No file contents printed, thus utf-8 autodecoding in run_command is fine. 469 478 return ["git", "diff", "--name-status", "HEAD"] 470 479 … … 491 500 492 501 def conflicted_files(self): 502 # We do not need to pass decode_output for this diff command 503 # as we're passing --name-status which does not output any data. 493 504 status_command = ['git', 'diff', '--name-status', '-C', '-M', '--diff-filter=U'] 494 505 return self.run_status_and_extract_filenames(status_command, self._status_regexp("U")) … … 505 516 506 517 def create_patch(self): 518 """Returns a byte array (str()) representing the patch file. 519 Patch files are effectively binary since they may contain 520 files of multiple different encodings.""" 507 521 # FIXME: This should probably use cwd=self.checkout_root 508 return run_command(['git', 'diff', '--binary', 'HEAD'] )522 return run_command(['git', 'diff', '--binary', 'HEAD'], decode_output=False) 509 523 510 524 @classmethod … … 518 532 519 533 def contents_at_revision(self, path, revision): 520 return run_command(["git", "show", "%s:%s" % (self.git_commit_from_svn_revision(revision), path)]) 534 """Returns a byte array (str()) containing the contents 535 of path @ revision in the repository.""" 536 return run_command(["git", "show", "%s:%s" % (self.git_commit_from_svn_revision(revision), path)], decode_output=False) 521 537 522 538 def diff_for_revision(self, revision): … … 564 580 565 581 def create_patch_from_local_commit(self, commit_id): 566 return run_command(['git', 'diff', '--binary', commit_id + "^.." + commit_id]) 582 """Returns a byte array (str()) representing the patch file. 583 Patch files are effectively binary since they may contain 584 files of multiple different encodings.""" 585 return run_command(['git', 'diff', '--binary', commit_id + "^.." + commit_id], decode_output=False) 567 586 568 587 def create_patch_since_local_commit(self, commit_id): 569 return run_command(['git', 'diff', '--binary', commit_id]) 588 """Returns a byte array (str()) representing the patch file. 589 Patch files are effectively binary since they may contain 590 files of multiple different encodings.""" 591 return run_command(['git', 'diff', '--binary', commit_id], decode_output=False) 570 592 571 593 def commit_locally_with_message(self, message): -
trunk/WebKitTools/Scripts/webkitpy/common/checkout/scm_unittest.py
r58036 r58210 53 53 54 54 # FIXME: This should be unified into one of the executive.py commands! 55 # Callers could use run_and_throw_if_fail(args, cwd=cwd, quiet=True) 55 56 def run_silent(args, cwd=None): 56 57 process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd) … … 69 70 with codecs.open(file_path, "r", encoding) as file: 70 71 return file.read() 72 73 74 def _make_diff(command, *args): 75 # We use this wrapper to disable output decoding. diffs should be treated as 76 # binary files since they may include text files of multiple differnet encodings. 77 return run_command([command, "diff"] + list(args), decode_output=False) 78 79 80 def _svn_diff(*args): 81 return _make_diff("svn", *args) 82 83 84 def _git_diff(*args): 85 return _make_diff("git", *args) 71 86 72 87 … … 107 122 cls._svn_commit("third commit") 108 123 109 write_into_file_at_path("test_file", "test1test2test3\ntest4\n") 124 # This 4th commit is used to make sure that our patch file handling 125 # code correctly treats patches as binary and does not attempt to 126 # decode them assuming they're utf-8. 127 write_into_file_at_path("test_file", u"latin1 test: \u00A0\n", "latin1") 128 write_into_file_at_path("test_file2", u"utf-8 test: \u00A0\n", "utf-8") 110 129 cls._svn_commit("fourth commit") 111 130 … … 185 204 class SCMTest(unittest.TestCase): 186 205 def _create_patch(self, patch_contents): 187 patch_path = os.path.join(self.svn_checkout_path, 'patch.diff') 188 write_into_file_at_path(patch_path, patch_contents) 189 patch = {} 190 patch['bug_id'] = '12345' 191 patch['url'] = 'file://%s' % urllib.pathname2url(patch_path) 192 193 attachment = Attachment(patch, None) # FIXME: This is a hack, scm.py shouldn't be fetching attachment data. 206 # FIXME: This code is brittle if the Attachment API changes. 207 attachment = Attachment({"bug_id": 12345}, None) 208 attachment.contents = lambda: patch_contents 209 194 210 joe_cool = Committer(name="Joe Cool", email_or_emails=None) 195 attachment. _reviewer =joe_cool211 attachment.reviewer = lambda: joe_cool 196 212 197 213 return attachment … … 257 273 self.assertEqual(changed_files, ["test_dir/test_file3", "test_file"]) 258 274 self.assertEqual(sorted(self.scm.changed_files_for_revision(3)), sorted(["test_file", "test_file2"])) # Git and SVN return different orders. 259 self.assertEqual(self.scm.changed_files_for_revision( 4), ["test_file"])275 self.assertEqual(self.scm.changed_files_for_revision(1), ["test_file"]) 260 276 261 277 def _shared_test_contents_at_revision(self): 262 278 self.assertEqual(self.scm.contents_at_revision("test_file", 2), "test1test2") 263 279 self.assertEqual(self.scm.contents_at_revision("test_file", 3), "test1test2test3\n") 264 self.assertEqual(self.scm.contents_at_revision("test_file", 4), "test1test2test3\ntest4\n") 280 281 # Verify that contents_at_revision returns a byte array, aka str(): 282 self.assertEqual(self.scm.contents_at_revision("test_file", 4), u"latin1 test: \u00A0\n".encode("latin1")) 283 self.assertEqual(self.scm.contents_at_revision("test_file2", 4), u"utf-8 test: \u00A0\n".encode("utf-8")) 265 284 266 285 self.assertEqual(self.scm.contents_at_revision("test_file2", 3), "second file") … … 527 546 def test_apply_svn_patch(self): 528 547 scm = detect_scm_system(self.svn_checkout_path) 529 patch = self._create_patch( run_command(['svn', 'diff', '-r4:3']))548 patch = self._create_patch(_svn_diff("-r4:3")) 530 549 self._setup_webkittools_scripts_symlink(scm) 531 550 Checkout(scm).apply_patch(patch) … … 533 552 def test_apply_svn_patch_force(self): 534 553 scm = detect_scm_system(self.svn_checkout_path) 535 patch = self._create_patch( run_command(['svn', 'diff', '-r2:4']))554 patch = self._create_patch(_svn_diff("-r2:4")) 536 555 self._setup_webkittools_scripts_symlink(scm) 537 556 self.assertRaises(ScriptError, Checkout(scm).apply_patch, patch, force=True) … … 661 680 write_into_file_at_path(test_file, 'foo') 662 681 663 diff_to_common_base = run_command(['git', 'diff', self.scm.svn_branch_name() + '..'])664 diff_to_merge_base = run_command(['git', 'diff', self.scm.svn_merge_base()])682 diff_to_common_base = _git_diff(self.scm.svn_branch_name() + '..') 683 diff_to_merge_base = _git_diff(self.scm.svn_merge_base()) 665 684 666 685 self.assertFalse(re.search(r'foo', diff_to_common_base)) … … 715 734 # as currently svn-apply will error out when trying to remove directories 716 735 # in Git: https://bugs.webkit.org/show_bug.cgi?id=34871 717 patch = self._create_patch( run_command(['git', 'diff', 'HEAD..HEAD^']))736 patch = self._create_patch(_git_diff('HEAD..HEAD^')) 718 737 self._setup_webkittools_scripts_symlink(scm) 719 738 Checkout(scm).apply_patch(patch) … … 721 740 def test_apply_git_patch_force(self): 722 741 scm = detect_scm_system(self.git_checkout_path) 723 patch = self._create_patch( run_command(['git', 'diff', 'HEAD~2..HEAD']))742 patch = self._create_patch(_git_diff('HEAD~2..HEAD')) 724 743 self._setup_webkittools_scripts_symlink(scm) 725 744 self.assertRaises(ScriptError, Checkout(scm).apply_patch, patch, force=True) -
trunk/WebKitTools/Scripts/webkitpy/common/net/bugzilla.py
r58066 r58210 569 569 def _fill_attachment_form(self, 570 570 description, 571 diff,571 patch_file_object, 572 572 comment_text=None, 573 573 mark_for_review=False, … … 590 590 patch_name ="%s.patch" % timestamp() 591 591 592 # ClientForm expects a file-like object593 patch_file_object = StringIO.StringIO(diff.encode("utf-8"))594 592 self.browser.add_file(patch_file_object, 595 593 "text/plain", … … 619 617 self.browser.select_form(name="entryform") 620 618 619 # _fill_attachment_form expects a file-like object 620 # Patch files are already binary, so no encoding needed. 621 assert(isinstance(diff, str)) 622 patch_file_object = StringIO.StringIO(diff) 621 623 self._fill_attachment_form(description, 622 diff,624 patch_file_object, 623 625 mark_for_review=mark_for_review, 624 626 mark_for_commit_queue=mark_for_commit_queue, … … 683 685 684 686 if diff: 687 # _fill_attachment_form expects a file-like object 688 # Patch files are already binary, so no encoding needed. 689 assert(isinstance(diff, str)) 690 patch_file_object = StringIO.StringIO(diff) 685 691 self._fill_attachment_form( 686 692 patch_description, 687 diff,693 patch_file_object, 688 694 mark_for_review=mark_for_review, 689 695 mark_for_commit_queue=mark_for_commit_queue) -
trunk/WebKitTools/Scripts/webkitpy/common/prettypatch.py
r58036 r58210 37 37 38 38 def pretty_diff_file(self, diff): 39 # Diffs can contain multiple text files of different encodings 40 # so we always deal with them as byte arrays, not unicode strings. 41 assert(isinstance(diff, str)) 39 42 pretty_diff = self.pretty_diff(diff) 40 43 diff_file = tempfile.NamedTemporaryFile(suffix=".html") 41 diff_file.write(pretty_diff .encode("utf-8"))44 diff_file.write(pretty_diff) 42 45 diff_file.flush() 43 46 return diff_file … … 53 56 prettify_path, 54 57 ] 55 return self._executive.run_command(args, input=diff) 58 # PrettyPatch does not modify the encoding of the diff output 59 # so we can't expect it to be utf-8. 60 return self._executive.run_command(args, input=diff, decode_output=False) -
trunk/WebKitTools/Scripts/webkitpy/common/prettypatch_unittest.py
r58209 r58210 1 1 # Copyright (C) 2010 Google Inc. All rights reserved. 2 # Copyright (C) 2009 Daniel Bates (dbates@intudata.com). All rights reserved.3 2 # 4 3 # Redistribution and use in source and binary forms, with or without … … 28 27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 28 29 import os.path 30 30 import unittest 31 31 32 from webkitpy.common.system.executive import Executive, run_command 32 from webkitpy.common.system.executive import Executive 33 from webkitpy.common.prettypatch import PrettyPatch 33 34 34 35 35 class ExecutiveTest(unittest.TestCase):36 class PrettyPatchTest(unittest.TestCase): 36 37 37 def test_run_command_with_bad_command(self): 38 def run_bad_command(): 39 run_command(["foo_bar_command_blah"], error_handler=Executive.ignore_error, return_exit_code=True) 40 self.failUnlessRaises(OSError, run_bad_command) 38 _diff_with_multiple_encodings = """ 39 Index: utf8_test 40 =================================================================== 41 --- utf8_test\t(revision 0) 42 +++ utf8_test\t(revision 0) 43 @@ -0,0 +1 @@ 44 +utf-8 test: \xc2\xa0 45 Index: latin1_test 46 =================================================================== 47 --- latin1_test\t(revision 0) 48 +++ latin1_test\t(revision 0) 49 @@ -0,0 +1 @@ 50 +latin1 test: \xa0 51 """ 41 52 42 def test_run_command_with_unicode(self): 43 """Validate that it is safe to pass unicode() objects 44 to Executive.run* methods, and they will return unicode() 45 objects by default unless decode_output=False""" 46 executive = Executive() 47 unicode_tor = u"WebKit \u2661 Tor Arne Vestb\u00F8!" 48 utf8_tor = unicode_tor.encode("utf-8") 53 def test_pretty_diff_encodings(self): 54 # This is slightly lame that PrettyPatch requires 55 # checkout_root to be passed to it. 56 webkitpy_common = os.path.dirname(__file__) 57 webkitpy = os.path.dirname(webkitpy_common) 58 scripts = os.path.dirname(webkitpy) 59 webkit_tools = os.path.dirname(scripts) 60 webkit_root = os.path.dirname(webkit_tools) 49 61 50 output = executive.run_command(["cat"], input=unicode_tor) 51 self.assertEquals(output, unicode_tor) 52 53 output = executive.run_command(["echo", "-n", unicode_tor]) 54 self.assertEquals(output, unicode_tor) 55 56 output = executive.run_command(["echo", "-n", unicode_tor], decode_output=False) 57 self.assertEquals(output, utf8_tor) 58 59 # FIXME: We should only have one run* method to test 60 output = executive.run_and_throw_if_fail(["echo", "-n", unicode_tor], quiet=True) 61 self.assertEquals(output, unicode_tor) 62 63 output = executive.run_and_throw_if_fail(["echo", "-n", unicode_tor], quiet=True, decode_output=False) 64 self.assertEquals(output, utf8_tor) 62 pretty_patch = PrettyPatch(Executive(), webkit_root) 63 pretty = pretty_patch.pretty_diff(self._diff_with_multiple_encodings) 64 self.assertTrue(pretty) # We got some output 65 self.assertTrue(isinstance(pretty, str)) # It's a byte array, not unicode -
trunk/WebKitTools/Scripts/webkitpy/common/system/executive_unittest.py
r58036 r58210 57 57 self.assertEquals(output, utf8_tor) 58 58 59 # Make sure that str() input also works. 60 output = executive.run_command(["cat"], input=utf8_tor, decode_output=False) 61 self.assertEquals(output, utf8_tor) 62 59 63 # FIXME: We should only have one run* method to test 60 64 output = executive.run_and_throw_if_fail(["echo", "-n", unicode_tor], quiet=True) -
trunk/WebKitTools/Scripts/webkitpy/tool/mocktool.py
r58036 r58210 508 508 error_handler=None, 509 509 return_exit_code=False, 510 return_stderr=True): 510 return_stderr=True, 511 decode_output=False): 511 512 return "MOCK output of child process" 512 513
Note: See TracChangeset
for help on using the changeset viewer.