Changeset 62168 in webkit
- Timestamp:
- Jun 30, 2010 1:28:31 AM (14 years ago)
- Location:
- trunk
- Files:
-
- 10 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/LayoutTests/ChangeLog
r62166 r62168 1 2010-06-29 Eric Seidel <eric@webkit.org> 2 3 Reviewed by Adam Barth. 4 5 HTMLTokenizer needs EndOfFile support 6 https://bugs.webkit.org/show_bug.cgi?id=41344 7 8 We now pass an additional 60 subtests for the 9 html5lib test suite. Boo-ya. 10 11 * html5lib/runner-expected.txt: 12 1 13 2010-06-28 Eric Seidel <eric@webkit.org> 2 14 -
trunk/LayoutTests/html5lib/runner-expected-html5.txt
r62166 r62168 19 19 33 20 20 34 21 3522 3723 21 41 24 22 50 … … 75 73 8 76 74 9 77 10 78 11 79 13 80 17 81 18 82 19 83 20 84 21 85 22 75 11 76 13 86 77 24 87 78 25 … … 89 80 27 90 81 28 91 2992 82 32 93 3394 83 34 95 84 35 … … 97 86 37 98 87 38 99 40100 88 42 101 89 46 … … 106 94 53 107 95 54 108 57109 96 58 110 97 … … 137 124 resources/tests6.dat: 138 125 1 139 6140 126 8 141 127 9 … … 290 276 291 277 resources/tests16.dat: 292 3293 4294 5295 6296 7297 8298 9299 10300 12301 13302 14303 15304 16305 17306 25307 27308 28309 42310 44311 45312 56313 57314 58315 97316 98317 99318 100319 101320 102321 103322 104323 106324 107325 108326 109327 110328 111329 119330 121331 122332 136333 138334 139335 148336 149337 150338 278 187 339 279 188 -
trunk/LayoutTests/html5lib/runner-expected.txt
r61956 r62168 10 10 33 11 11 34 12 3513 3714 12 41 15 13 50 … … 42 40 8 43 41 9 44 10 45 11 46 17 47 18 48 19 49 20 50 21 51 22 52 29 42 11 53 43 32 54 3355 44 36 56 4057 45 42 58 46 44 … … 60 48 53 61 49 54 62 5763 50 64 51 resources/tests3.dat: … … 84 71 resources/tests6.dat: 85 72 1 86 687 73 8 88 74 9 … … 211 197 212 198 resources/tests16.dat: 213 3214 4215 5216 6217 7218 8219 9220 10221 12222 13223 14224 15225 16226 17227 25228 27229 28230 42231 44232 45233 56234 57235 58236 199 84 237 200 85 … … 240 203 88 241 204 94 242 97243 98244 99245 100246 101247 102248 103249 104250 106251 107252 108253 109254 110255 111256 119257 121258 122259 136260 138261 139262 148263 149264 150265 205 176 266 206 177 -
trunk/WebCore/ChangeLog
r62166 r62168 1 2010-06-29 Eric Seidel <eric@webkit.org> 2 3 Reviewed by Adam Barth. 4 5 HTMLTokenizer needs EndOfFile support 6 https://bugs.webkit.org/show_bug.cgi?id=41344 7 8 We're using \0 as the EndOfFile marker as HTML5 replaces 9 all other \0 with \0xFFFD. Added some special case logic 10 to InputStreamPreprocessor::peek not to replace \0 when 11 its being used at the end of a stream. 12 13 This fixed 60 subtests in html5lib/runner.html. 14 15 There are still at least two states (BogusCommentState and 16 CDATASectionState) which do not have proper EOF support. 17 18 * html/HTMLDocumentParser.cpp: 19 (WebCore::HTMLDocumentParser::finish): 20 (WebCore::HTMLDocumentParser::finishWasCalled): 21 * html/HTMLInputStream.h: 22 (WebCore::HTMLInputStream::markEndOfFile): 23 (WebCore::HTMLInputStream::haveSeenEndOfFile): 24 * html/HTMLToken.h: 25 (WebCore::HTMLToken::makeEndOfFile): 26 * html/HTMLTokenizer.cpp: 27 (WebCore::HTMLTokenizer::nextToken): 28 (WebCore::HTMLTokenizer::emitEndOfFile): 29 * html/HTMLTokenizer.h: 30 (WebCore::HTMLTokenizer::InputStreamPreprocessor::peek): 31 (WebCore::HTMLTokenizer::InputStreamPreprocessor::shouldTreatNullAsEndOfFileMarker): 32 * html/HTMLTreeBuilder.cpp: 33 (WebCore::HTMLTreeBuilder::passTokenToLegacyParser): 34 1 35 2010-06-28 Eric Seidel <eric@webkit.org> 2 36 -
trunk/WebCore/html/HTMLDocumentParser.cpp
r62033 r62168 290 290 void HTMLDocumentParser::finish() 291 291 { 292 // We're not going to get any more data off the network, so we closethe293 // input stream to indicate EOF.294 m_input. close();292 // We're not going to get any more data off the network, so we tell the 293 // input stream we've reached the end of file. 294 m_input.markEndOfFile(); 295 295 attemptToEnd(); 296 296 } … … 298 298 bool HTMLDocumentParser::finishWasCalled() 299 299 { 300 return m_input. isClosed();300 return m_input.haveSeenEndOfFile(); 301 301 } 302 302 -
trunk/WebCore/html/HTMLInputStream.h
r61608 r62168 65 65 } 66 66 67 void close() { m_last->close(); } 68 bool isClosed() { return m_last->isClosed(); } 67 void markEndOfFile() 68 { 69 // FIXME: This should use InputStreamPreprocessor::endOfFileMarker 70 // once InputStreamPreprocessor is split off into its own header. 71 static const UChar endOfFileMarker = 0; 72 m_last->append(SegmentedString(String(&endOfFileMarker, 1))); 73 m_last->close(); 74 } 75 76 bool haveSeenEndOfFile() 77 { 78 return m_last->isClosed(); 79 } 69 80 70 81 SegmentedString& current() { return m_first; } -
trunk/WebCore/html/HTMLToken.h
r61971 r62168 61 61 } 62 62 63 void makeEndOfFile() 64 { 65 ASSERT(m_type == Uninitialized); 66 m_type = EndOfFile; 67 } 68 63 69 void beginStartTag(UChar character) 64 70 { -
trunk/WebCore/html/HTMLTokenizer.cpp
r61673 r62168 45 45 using namespace HTMLNames; 46 46 47 const UChar HTMLTokenizer::InputStreamPreprocessor::endOfFileMarker = 0; 48 47 49 namespace { 48 50 … … 170 172 source.advance(m_lineNumber); \ 171 173 emitCurrentToken(); \ 174 return true; \ 175 } while (false) 176 177 // Identical to EMIT_AND_RESUME_IN, except does not advance. 178 #define EMIT_AND_RECONSUME_IN(stateName) \ 179 do { \ 180 m_state = stateName; \ 181 emitCurrentToken(); \ 182 return true; \ 183 } while (false) 184 185 // Used to emit the EndOfFile token. 186 // Check if we have buffered characters to emit first before emitting the EOF. 187 #define EMIT_END_OF_FILE() \ 188 do { \ 189 if (shouldEmitBufferedCharacterToken(source)) \ 190 return true; \ 191 m_state = DataState; \ 192 source.advance(m_lineNumber); \ 193 emitEndOfFile(); \ 172 194 return true; \ 173 195 } while (false) … … 253 275 } 254 276 ADVANCE_TO(TagOpenState); 255 } else { 277 } else if (cc == InputStreamPreprocessor::endOfFileMarker) 278 EMIT_END_OF_FILE(); 279 else { 256 280 emitCharacter(cc); 257 281 ADVANCE_TO(DataState); … … 272 296 else if (cc == '<') 273 297 ADVANCE_TO(RCDATALessThanSignState); 298 else if (cc == InputStreamPreprocessor::endOfFileMarker) 299 EMIT_END_OF_FILE(); 274 300 else { 275 301 emitCharacter(cc); … … 289 315 if (cc == '<') 290 316 ADVANCE_TO(RAWTEXTLessThanSignState); 317 else if (cc == InputStreamPreprocessor::endOfFileMarker) 318 EMIT_END_OF_FILE(); 291 319 else { 292 320 emitCharacter(cc); … … 299 327 if (cc == '<') 300 328 ADVANCE_TO(ScriptDataLessThanSignState); 329 else if (cc == InputStreamPreprocessor::endOfFileMarker) 330 EMIT_END_OF_FILE(); 301 331 else { 302 332 emitCharacter(cc); … … 307 337 308 338 BEGIN_STATE(PLAINTEXTState) { 309 emitCharacter(cc); 339 if (cc == InputStreamPreprocessor::endOfFileMarker) 340 EMIT_END_OF_FILE(); 341 else 342 emitCharacter(cc); 310 343 ADVANCE_TO(PLAINTEXTState); 311 344 } … … 347 380 emitParseError(); 348 381 ADVANCE_TO(DataState); 382 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 383 emitParseError(); 384 emitCharacter('<'); 385 emitCharacter('/'); 386 RECONSUME_IN(DataState); 349 387 } else { 350 388 emitParseError(); 351 389 RECONSUME_IN(BogusCommentState); 352 390 } 353 // FIXME: Handle EOF properly.354 391 } 355 392 END_STATE() … … 365 402 m_token->appendToName(toLowerCase(cc)); 366 403 ADVANCE_TO(TagNameState); 404 } if (cc == InputStreamPreprocessor::endOfFileMarker) { 405 emitParseError(); 406 RECONSUME_IN(DataState); 367 407 } else { 368 408 m_token->appendToName(cc); 369 409 ADVANCE_TO(TagNameState); 370 410 } 371 // FIXME: Handle EOF properly.372 411 } 373 412 END_STATE() … … 575 614 } else if (cc == '<') 576 615 ADVANCE_TO(ScriptDataEscapedLessThanSignState); 577 else { 616 else if (cc == InputStreamPreprocessor::endOfFileMarker) { 617 emitParseError(); 618 RECONSUME_IN(DataState); 619 } else { 578 620 emitCharacter(cc); 579 621 ADVANCE_TO(ScriptDataEscapedState); 580 622 } 581 // FIXME: Handle EOF properly.582 623 } 583 624 END_STATE() … … 589 630 } else if (cc == '<') 590 631 ADVANCE_TO(ScriptDataEscapedLessThanSignState); 591 else { 632 else if (cc == InputStreamPreprocessor::endOfFileMarker) { 633 emitParseError(); 634 RECONSUME_IN(DataState); 635 } else { 592 636 emitCharacter(cc); 593 637 ADVANCE_TO(ScriptDataEscapedState); 594 638 } 595 // FIXME: Handle EOF properly.596 639 } 597 640 END_STATE() … … 606 649 emitCharacter(cc); 607 650 ADVANCE_TO(ScriptDataState); 651 } if (cc == InputStreamPreprocessor::endOfFileMarker) { 652 emitParseError(); 653 RECONSUME_IN(DataState); 608 654 } else { 609 655 emitCharacter(cc); 610 656 ADVANCE_TO(ScriptDataEscapedState); 611 657 } 612 // FIXME: Handle EOF properly.613 658 } 614 659 END_STATE() … … 711 756 emitCharacter(cc); 712 757 ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState); 758 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 759 emitParseError(); 760 RECONSUME_IN(DataState); 713 761 } else { 714 762 emitCharacter(cc); 715 763 ADVANCE_TO(ScriptDataDoubleEscapedState); 716 764 } 717 // FIXME: Handle EOF properly.718 765 } 719 766 END_STATE() … … 726 773 emitCharacter(cc); 727 774 ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState); 775 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 776 emitParseError(); 777 RECONSUME_IN(DataState); 728 778 } else { 729 779 emitCharacter(cc); 730 780 ADVANCE_TO(ScriptDataDoubleEscapedState); 731 781 } 732 // FIXME: Handle EOF properly.733 782 } 734 783 END_STATE() … … 744 793 emitCharacter(cc); 745 794 ADVANCE_TO(ScriptDataState); 795 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 796 emitParseError(); 797 RECONSUME_IN(DataState); 746 798 } else { 747 799 emitCharacter(cc); 748 800 ADVANCE_TO(ScriptDataDoubleEscapedState); 749 801 } 750 // FIXME: Handle EOF properly.751 802 } 752 803 END_STATE() … … 793 844 m_token->appendToAttributeName(toLowerCase(cc)); 794 845 ADVANCE_TO(AttributeNameState); 846 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 847 emitParseError(); 848 RECONSUME_IN(DataState); 795 849 } else { 796 850 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') … … 800 854 ADVANCE_TO(AttributeNameState); 801 855 } 802 // FIXME: Handle EOF properly.803 856 } 804 857 END_STATE() … … 816 869 m_token->appendToAttributeName(toLowerCase(cc)); 817 870 ADVANCE_TO(AttributeNameState); 871 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 872 emitParseError(); 873 RECONSUME_IN(DataState); 818 874 } else { 819 875 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') … … 822 878 ADVANCE_TO(AttributeNameState); 823 879 } 824 // FIXME: Handle EOF properly.825 880 } 826 881 END_STATE() … … 839 894 m_token->appendToAttributeName(toLowerCase(cc)); 840 895 ADVANCE_TO(AttributeNameState); 896 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 897 emitParseError(); 898 RECONSUME_IN(DataState); 841 899 } else { 842 900 if (cc == '"' || cc == '\'' || cc == '<') … … 846 904 ADVANCE_TO(AttributeNameState); 847 905 } 848 // FIXME: Handle EOF properly.849 906 } 850 907 END_STATE() … … 862 919 emitParseError(); 863 920 EMIT_AND_RESUME_IN(DataState); 921 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 922 emitParseError(); 923 RECONSUME_IN(DataState); 864 924 } else { 865 925 if (cc == '<' || cc == '=' || cc == '`') … … 877 937 m_additionalAllowedCharacter = '"'; 878 938 ADVANCE_TO(CharacterReferenceInAttributeValueState); 939 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 940 emitParseError(); 941 RECONSUME_IN(DataState); 879 942 } else { 880 943 m_token->appendToAttributeValue(cc); 881 944 ADVANCE_TO(AttributeValueDoubleQuotedState); 882 945 } 883 // FIXME: Handle EOF properly.884 946 } 885 947 END_STATE() … … 891 953 m_additionalAllowedCharacter = '\''; 892 954 ADVANCE_TO(CharacterReferenceInAttributeValueState); 955 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 956 emitParseError(); 957 RECONSUME_IN(DataState); 893 958 } else { 894 959 m_token->appendToAttributeValue(cc); 895 960 ADVANCE_TO(AttributeValueSingleQuotedState); 896 961 } 897 // FIXME: Handle EOF properly.898 962 } 899 963 END_STATE() … … 907 971 } else if (cc == '>') 908 972 EMIT_AND_RESUME_IN(DataState); 909 else { 973 else if (cc == InputStreamPreprocessor::endOfFileMarker) { 974 emitParseError(); 975 RECONSUME_IN(DataState); 976 } else { 910 977 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`') 911 978 emitParseError(); … … 913 980 ADVANCE_TO(AttributeValueUnquotedState); 914 981 } 915 // FIXME: Handle EOF properly.916 982 } 917 983 END_STATE() … … 952 1018 else if (cc == '>') 953 1019 EMIT_AND_RESUME_IN(DataState); 954 else { 1020 else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1021 emitParseError(); 1022 RECONSUME_IN(DataState); 1023 } else { 955 1024 emitParseError(); 956 1025 RECONSUME_IN(BeforeAttributeNameState); 957 1026 } 958 // FIXME: Handle EOF properly.959 1027 } 960 1028 END_STATE() … … 964 1032 notImplemented(); 965 1033 EMIT_AND_RESUME_IN(DataState); 1034 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1035 emitParseError(); 1036 RECONSUME_IN(DataState); 966 1037 } else { 967 1038 emitParseError(); 968 1039 RECONSUME_IN(BeforeAttributeNameState); 969 1040 } 970 // FIXME: Handle EOF properly.971 1041 } 972 1042 END_STATE() … … 1025 1095 emitParseError(); 1026 1096 EMIT_AND_RESUME_IN(DataState); 1097 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1098 emitParseError(); 1099 EMIT_AND_RECONSUME_IN(DataState); 1027 1100 } else { 1028 1101 m_token->appendToComment(cc); 1029 1102 ADVANCE_TO(CommentState); 1030 1103 } 1031 // FIXME: Handle EOF properly.1032 1104 } 1033 1105 END_STATE() … … 1039 1111 emitParseError(); 1040 1112 EMIT_AND_RESUME_IN(DataState); 1113 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1114 emitParseError(); 1115 EMIT_AND_RECONSUME_IN(DataState); 1041 1116 } else { 1042 1117 m_token->appendToComment('-'); … … 1044 1119 ADVANCE_TO(CommentState); 1045 1120 } 1046 // FIXME: Handle EOF properly.1047 1121 } 1048 1122 END_STATE() … … 1051 1125 if (cc == '-') 1052 1126 ADVANCE_TO(CommentEndDashState); 1053 else { 1127 else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1128 emitParseError(); 1129 EMIT_AND_RECONSUME_IN(DataState); 1130 } else { 1054 1131 m_token->appendToComment(cc); 1055 1132 ADVANCE_TO(CommentState); 1056 1133 } 1057 // FIXME: Handle EOF properly.1058 1134 } 1059 1135 END_STATE() … … 1062 1138 if (cc == '-') 1063 1139 ADVANCE_TO(CommentEndState); 1064 else { 1140 else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1141 emitParseError(); 1142 EMIT_AND_RECONSUME_IN(DataState); 1143 } else { 1065 1144 m_token->appendToComment('-'); 1066 1145 m_token->appendToComment(cc); 1067 1146 ADVANCE_TO(CommentState); 1068 1147 } 1069 // FIXME: Handle EOF properly.1070 1148 } 1071 1149 END_STATE() … … 1088 1166 m_token->appendToComment(cc); 1089 1167 ADVANCE_TO(CommentEndState); 1168 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1169 emitParseError(); 1170 EMIT_AND_RECONSUME_IN(DataState); 1090 1171 } else { 1091 1172 emitParseError(); … … 1095 1176 ADVANCE_TO(CommentState); 1096 1177 } 1097 // FIXME: Handle EOF properly.1098 1178 } 1099 1179 END_STATE() … … 1107 1187 } else if (cc == '>') 1108 1188 EMIT_AND_RESUME_IN(DataState); 1109 else { 1189 else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1190 emitParseError(); 1191 EMIT_AND_RECONSUME_IN(DataState); 1192 } else { 1110 1193 m_token->appendToComment('-'); 1111 1194 m_token->appendToComment('-'); … … 1114 1197 ADVANCE_TO(CommentState); 1115 1198 } 1116 // FIXME: Handle EOF properly.1117 1199 } 1118 1200 END_STATE() … … 1126 1208 else if (cc == '>') 1127 1209 EMIT_AND_RESUME_IN(DataState); 1128 else { 1210 else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1211 emitParseError(); 1212 EMIT_AND_RECONSUME_IN(DataState); 1213 } else { 1129 1214 m_token->appendToComment(cc); 1130 1215 ADVANCE_TO(CommentState); 1131 1216 } 1132 // FIXME: Handle EOF properly.1133 1217 } 1134 1218 END_STATE() … … 1137 1221 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1138 1222 ADVANCE_TO(BeforeDOCTYPENameState); 1139 else { 1223 else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1224 emitParseError(); 1225 m_token->beginDOCTYPE(); 1226 m_token->setForceQuirks(); 1227 EMIT_AND_RECONSUME_IN(DataState); 1228 } else { 1140 1229 emitParseError(); 1141 1230 RECONSUME_IN(BeforeDOCTYPENameState); 1142 1231 } 1143 // FIXME: Handle EOF properly.1144 1232 } 1145 1233 END_STATE() … … 1156 1244 m_token->setForceQuirks(); 1157 1245 EMIT_AND_RESUME_IN(DataState); 1246 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1247 emitParseError(); 1248 m_token->beginDOCTYPE(); 1249 m_token->setForceQuirks(); 1250 EMIT_AND_RECONSUME_IN(DataState); 1158 1251 } else { 1159 1252 m_token->beginDOCTYPE(cc); 1160 1253 ADVANCE_TO(DOCTYPENameState); 1161 1254 } 1162 // FIXME: Handle EOF properly.1163 1255 } 1164 1256 END_STATE() … … 1172 1264 m_token->appendToName(toLowerCase(cc)); 1173 1265 ADVANCE_TO(DOCTYPENameState); 1266 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1267 emitParseError(); 1268 m_token->setForceQuirks(); 1269 EMIT_AND_RECONSUME_IN(DataState); 1174 1270 } else { 1175 1271 m_token->appendToName(cc); 1176 1272 ADVANCE_TO(DOCTYPENameState); 1177 1273 } 1178 // FIXME: Handle EOF properly.1179 1274 } 1180 1275 END_STATE() … … 1185 1280 if (cc == '>') 1186 1281 EMIT_AND_RESUME_IN(DataState); 1187 else { 1282 else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1283 emitParseError(); 1284 m_token->setForceQuirks(); 1285 EMIT_AND_RECONSUME_IN(DataState); 1286 } else { 1188 1287 DEFINE_STATIC_LOCAL(String, publicString, ("public")); 1189 1288 DEFINE_STATIC_LOCAL(String, systemString, ("system")); … … 1207 1306 ADVANCE_TO(BogusDOCTYPEState); 1208 1307 } 1209 // FIXME: Handle EOF properly.1210 1308 } 1211 1309 END_STATE() … … 1226 1324 m_token->setForceQuirks(); 1227 1325 EMIT_AND_RESUME_IN(DataState); 1326 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1327 emitParseError(); 1328 m_token->setForceQuirks(); 1329 EMIT_AND_RECONSUME_IN(DataState); 1228 1330 } else { 1229 1331 emitParseError(); … … 1231 1333 ADVANCE_TO(BogusDOCTYPEState); 1232 1334 } 1233 // FIXME: Handle EOF properly.1234 1335 } 1235 1336 END_STATE() … … 1248 1349 m_token->setForceQuirks(); 1249 1350 EMIT_AND_RESUME_IN(DataState); 1351 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1352 emitParseError(); 1353 m_token->setForceQuirks(); 1354 EMIT_AND_RECONSUME_IN(DataState); 1250 1355 } else { 1251 1356 emitParseError(); … … 1253 1358 ADVANCE_TO(BogusDOCTYPEState); 1254 1359 } 1255 // FIXME: Handle EOF properly.1256 1360 } 1257 1361 END_STATE() … … 1264 1368 m_token->setForceQuirks(); 1265 1369 EMIT_AND_RESUME_IN(DataState); 1370 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1371 emitParseError(); 1372 m_token->setForceQuirks(); 1373 EMIT_AND_RECONSUME_IN(DataState); 1266 1374 } else { 1267 1375 m_token->appendToPublicIdentifier(cc); 1268 1376 ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); 1269 1377 } 1270 // FIXME: Handle EOF properly.1271 1378 } 1272 1379 END_STATE() … … 1279 1386 m_token->setForceQuirks(); 1280 1387 EMIT_AND_RESUME_IN(DataState); 1388 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1389 emitParseError(); 1390 m_token->setForceQuirks(); 1391 EMIT_AND_RECONSUME_IN(DataState); 1281 1392 } else { 1282 1393 m_token->appendToPublicIdentifier(cc); 1283 1394 ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); 1284 1395 } 1285 // FIXME: Handle EOF properly.1286 1396 } 1287 1397 END_STATE() … … 1300 1410 m_token->setSystemIdentifierToEmptyString(); 1301 1411 ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); 1412 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1413 emitParseError(); 1414 m_token->setForceQuirks(); 1415 EMIT_AND_RECONSUME_IN(DataState); 1302 1416 } else { 1303 1417 emitParseError(); … … 1305 1419 ADVANCE_TO(BogusDOCTYPEState); 1306 1420 } 1307 // FIXME: Handle EOF properly.1308 1421 } 1309 1422 END_STATE() … … 1320 1433 m_token->setSystemIdentifierToEmptyString(); 1321 1434 ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); 1435 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1436 emitParseError(); 1437 m_token->setForceQuirks(); 1438 EMIT_AND_RECONSUME_IN(DataState); 1322 1439 } else { 1323 1440 emitParseError(); … … 1325 1442 ADVANCE_TO(BogusDOCTYPEState); 1326 1443 } 1327 // FIXME: Handle EOF properly.1328 1444 } 1329 1445 END_STATE() … … 1344 1460 m_token->setForceQuirks(); 1345 1461 EMIT_AND_RESUME_IN(DataState); 1462 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1463 emitParseError(); 1464 m_token->setForceQuirks(); 1465 EMIT_AND_RECONSUME_IN(DataState); 1346 1466 } else { 1347 1467 emitParseError(); … … 1349 1469 ADVANCE_TO(BogusDOCTYPEState); 1350 1470 } 1351 // FIXME: Handle EOF properly.1352 1471 } 1353 1472 END_STATE() … … 1366 1485 m_token->setForceQuirks(); 1367 1486 EMIT_AND_RESUME_IN(DataState); 1487 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1488 emitParseError(); 1489 m_token->setForceQuirks(); 1490 EMIT_AND_RECONSUME_IN(DataState); 1368 1491 } else { 1369 1492 emitParseError(); … … 1371 1494 ADVANCE_TO(BogusDOCTYPEState); 1372 1495 } 1373 // FIXME: Handle EOF properly.1374 1496 } 1375 1497 END_STATE() … … 1382 1504 m_token->setForceQuirks(); 1383 1505 EMIT_AND_RESUME_IN(DataState); 1506 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1507 emitParseError(); 1508 m_token->setForceQuirks(); 1509 EMIT_AND_RECONSUME_IN(DataState); 1384 1510 } else { 1385 1511 m_token->appendToSystemIdentifier(cc); 1386 1512 ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); 1387 1513 } 1388 // FIXME: Handle EOF properly.1389 1514 } 1390 1515 END_STATE() … … 1397 1522 m_token->setForceQuirks(); 1398 1523 EMIT_AND_RESUME_IN(DataState); 1524 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1525 emitParseError(); 1526 m_token->setForceQuirks(); 1527 EMIT_AND_RECONSUME_IN(DataState); 1399 1528 } else { 1400 1529 m_token->appendToSystemIdentifier(cc); 1401 1530 ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); 1402 1531 } 1403 // FIXME: Handle EOF properly.1404 1532 } 1405 1533 END_STATE() … … 1410 1538 else if (cc == '>') 1411 1539 EMIT_AND_RESUME_IN(DataState); 1412 else { 1540 else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1541 emitParseError(); 1542 m_token->setForceQuirks(); 1543 EMIT_AND_RECONSUME_IN(DataState); 1544 } else { 1413 1545 emitParseError(); 1414 1546 ADVANCE_TO(BogusDOCTYPEState); 1415 1547 } 1416 // FIXME: Handle EOF properly.1417 1548 } 1418 1549 END_STATE() … … 1421 1552 if (cc == '>') 1422 1553 EMIT_AND_RESUME_IN(DataState); 1554 else if (cc == InputStreamPreprocessor::endOfFileMarker) 1555 EMIT_AND_RECONSUME_IN(DataState); 1423 1556 ADVANCE_TO(BogusDOCTYPEState); 1424 // FIXME: Handle EOF properly.1425 1557 } 1426 1558 END_STATE() … … 1429 1561 notImplemented(); 1430 1562 ADVANCE_TO(CDATASectionState); 1563 // FIXME: Handle EOF properly. 1431 1564 } 1432 1565 END_STATE() … … 1456 1589 inline void HTMLTokenizer::emitCharacter(UChar character) 1457 1590 { 1591 ASSERT(character != InputStreamPreprocessor::endOfFileMarker); 1458 1592 if (m_token->type() != HTMLToken::Character) { 1459 1593 m_token->beginCharacter(character); … … 1485 1619 } 1486 1620 1621 inline void HTMLTokenizer::emitEndOfFile() 1622 { 1623 // Discard any in-progress token before setting up an EOF token. 1624 m_token->clear(); 1625 m_token->makeEndOfFile(); 1626 } 1627 1487 1628 inline bool HTMLTokenizer::shouldEmitBufferedCharacterToken(const SegmentedString& source) 1488 1629 { -
trunk/WebCore/html/HTMLTokenizer.h
r61678 r62168 165 165 // by the replacement character. We suspect this is a problem with the spec as doing 166 166 // that filtering breaks surrogate pair handling and causes us not to match Minefield. 167 if (m_nextInputCharacter == '\0' )167 if (m_nextInputCharacter == '\0' && !shouldTreatNullAsEndOfFileMarker(source)) 168 168 m_nextInputCharacter = 0xFFFD; 169 169 } … … 180 180 } 181 181 182 static const UChar endOfFileMarker; 183 182 184 private: 185 bool shouldTreatNullAsEndOfFileMarker(SegmentedString& source) const 186 { 187 return source.isClosed() && source.length() == 1; 188 } 189 183 190 // http://www.whatwg.org/specs/web-apps/current-work/#next-input-character 184 191 UChar m_nextInputCharacter; … … 190 197 inline void emitCurrentToken(); 191 198 inline void emitCodePoint(unsigned); 199 inline void emitEndOfFile(); 192 200 193 201 inline bool processEntity(SegmentedString& source); -
trunk/WebCore/html/HTMLTreeBuilder.cpp
r62166 r62168 195 195 return; 196 196 } 197 198 if (token.type() == HTMLToken::EndOfFile) 199 return; 197 200 198 201 // For now, we translate into an old-style token for testing.
Note: See TracChangeset
for help on using the changeset viewer.