Changeset 60696 in webkit
- Timestamp:
- Jun 4, 2010 12:19:39 PM (14 years ago)
- Location:
- trunk/WebCore
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/WebCore/ChangeLog
r60694 r60696 1 2010-06-04 Adam Barth <abarth@webkit.org> 2 3 Reviewed by Eric Seidel. 4 5 Make HTML5Lexer go fast 6 https://bugs.webkit.org/show_bug.cgi?id=40048 7 8 The next step: using macros to delimit each state. Evetually, we're 9 going to change what these macros expand to. 10 11 * html/HTML5Lexer.cpp: 12 (WebCore::HTML5Lexer::nextToken): 13 1 14 2010-06-04 Adam Barth <abarth@webkit.org> 2 15 -
trunk/WebCore/html/HTML5Lexer.cpp
r60694 r60696 319 319 } 320 320 321 #define BEGIN_STATE(stateName) case stateName: 322 #define END_STATE() ASSERT_NOT_REACHED(); break; 323 321 324 // We'd like to use the standard do { } while (false) pattern here, but it 322 325 // doesn't play nicely with continue. … … 355 358 UChar cc = *source; 356 359 switch (m_state) { 357 case DataState:{360 BEGIN_STATE(DataState) { 358 361 if (cc == '&') 359 362 m_state = CharacterReferenceInDataState; … … 369 372 break; 370 373 } 371 case CharacterReferenceInDataState: { 374 END_STATE() 375 376 BEGIN_STATE(CharacterReferenceInDataState) { 372 377 if (!processEntity(source)) 373 378 return shouldEmitBufferedCharacterToken(source); 374 379 RECONSUME_IN(DataState); 375 380 } 376 case RCDATAState: { 381 END_STATE() 382 383 BEGIN_STATE(RCDATAState) { 377 384 if (cc == '&') 378 385 m_state = CharacterReferenceInRCDATAState; … … 383 390 break; 384 391 } 385 case CharacterReferenceInRCDATAState: { 392 END_STATE() 393 394 BEGIN_STATE(CharacterReferenceInRCDATAState) { 386 395 if (!processEntity(source)) 387 396 return shouldEmitBufferedCharacterToken(source); 388 397 RECONSUME_IN(RCDATAState); 389 398 } 390 case RAWTEXTState: { 399 END_STATE() 400 401 BEGIN_STATE(RAWTEXTState) { 391 402 if (cc == '<') 392 403 m_state = RAWTEXTLessThanSignState; … … 395 406 break; 396 407 } 397 case ScriptDataState: { 408 END_STATE() 409 410 BEGIN_STATE(ScriptDataState) { 398 411 if (cc == '<') 399 412 m_state = ScriptDataLessThanSignState; … … 402 415 break; 403 416 } 404 case PLAINTEXTState: { 417 END_STATE() 418 419 BEGIN_STATE(PLAINTEXTState) { 405 420 emitCharacter(cc); 406 421 break; 407 422 } 408 case TagOpenState: { 423 END_STATE() 424 425 BEGIN_STATE(TagOpenState) { 409 426 if (cc == '!') 410 427 m_state = MarkupDeclarationOpenState; … … 432 449 break; 433 450 } 434 case EndTagOpenState: { 451 END_STATE() 452 453 BEGIN_STATE(EndTagOpenState) { 435 454 if (cc >= 'A' && cc <= 'Z') { 436 455 m_token->beginEndTag(toLowerCase(cc)); … … 449 468 break; 450 469 } 451 case TagNameState: { 470 END_STATE() 471 472 BEGIN_STATE(TagNameState) { 452 473 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 453 474 m_state = BeforeAttributeNameState; … … 464 485 break; 465 486 } 466 case RCDATALessThanSignState: { 487 END_STATE() 488 489 BEGIN_STATE(RCDATALessThanSignState) { 467 490 if (cc == '/') { 468 491 m_temporaryBuffer.clear(); … … 475 498 break; 476 499 } 477 case RCDATAEndTagOpenState: { 500 END_STATE() 501 502 BEGIN_STATE(RCDATAEndTagOpenState) { 478 503 if (cc >= 'A' && cc <= 'Z') { 479 504 m_temporaryBuffer.append(cc); … … 491 516 break; 492 517 } 493 case RCDATAEndTagNameState: { 518 END_STATE() 519 520 BEGIN_STATE(RCDATAEndTagNameState) { 494 521 if (cc >= 'A' && cc <= 'Z') { 495 522 m_temporaryBuffer.append(cc); … … 526 553 break; 527 554 } 528 case RAWTEXTLessThanSignState: { 555 END_STATE() 556 557 BEGIN_STATE(RAWTEXTLessThanSignState) { 529 558 if (cc == '/') { 530 559 m_temporaryBuffer.clear(); … … 537 566 break; 538 567 } 539 case RAWTEXTEndTagOpenState: { 568 END_STATE() 569 570 BEGIN_STATE(RAWTEXTEndTagOpenState) { 540 571 if (cc >= 'A' && cc <= 'Z') { 541 572 m_temporaryBuffer.append(cc); … … 553 584 break; 554 585 } 555 case RAWTEXTEndTagNameState: { 586 END_STATE() 587 588 BEGIN_STATE(RAWTEXTEndTagNameState) { 556 589 if (cc >= 'A' && cc <= 'Z') { 557 590 m_temporaryBuffer.append(cc); … … 588 621 break; 589 622 } 590 case ScriptDataLessThanSignState: { 623 END_STATE() 624 625 BEGIN_STATE(ScriptDataLessThanSignState) { 591 626 if (cc == '/') { 592 627 m_temporaryBuffer.clear(); … … 603 638 break; 604 639 } 605 case ScriptDataEndTagOpenState: { 640 END_STATE() 641 642 BEGIN_STATE(ScriptDataEndTagOpenState) { 606 643 if (cc >= 'A' && cc <= 'Z') { 607 644 m_temporaryBuffer.append(cc); … … 619 656 break; 620 657 } 621 case ScriptDataEndTagNameState: { 658 END_STATE() 659 660 BEGIN_STATE(ScriptDataEndTagNameState) { 622 661 if (cc >= 'A' && cc <= 'Z') { 623 662 m_temporaryBuffer.append(cc); … … 654 693 break; 655 694 } 656 case ScriptDataEscapeStartState: { 695 END_STATE() 696 697 BEGIN_STATE(ScriptDataEscapeStartState) { 657 698 if (cc == '-') { 658 699 emitCharacter(cc); … … 663 704 break; 664 705 } 665 case ScriptDataEscapeStartDashState: { 706 END_STATE() 707 708 BEGIN_STATE(ScriptDataEscapeStartDashState) { 666 709 if (cc == '-') { 667 710 emitCharacter(cc); … … 672 715 break; 673 716 } 674 case ScriptDataEscapedState: { 717 END_STATE() 718 719 BEGIN_STATE(ScriptDataEscapedState) { 675 720 if (cc == '-') { 676 721 emitCharacter(cc); … … 683 728 break; 684 729 } 685 case ScriptDataEscapedDashState: { 730 END_STATE() 731 732 BEGIN_STATE(ScriptDataEscapedDashState) { 686 733 if (cc == '-') { 687 734 emitCharacter(cc); … … 696 743 break; 697 744 } 698 case ScriptDataEscapedDashDashState: { 745 END_STATE() 746 747 BEGIN_STATE(ScriptDataEscapedDashDashState) { 699 748 if (cc == '-') 700 749 emitCharacter(cc); … … 711 760 break; 712 761 } 713 case ScriptDataEscapedLessThanSignState: { 762 END_STATE() 763 764 BEGIN_STATE(ScriptDataEscapedLessThanSignState) { 714 765 if (cc == '/') { 715 766 m_temporaryBuffer.clear(); … … 734 785 break; 735 786 } 736 case ScriptDataEscapedEndTagOpenState: { 787 END_STATE() 788 789 BEGIN_STATE(ScriptDataEscapedEndTagOpenState) { 737 790 if (cc >= 'A' && cc <= 'Z') { 738 791 m_temporaryBuffer.append(cc); … … 750 803 break; 751 804 } 752 case ScriptDataEscapedEndTagNameState: { 805 END_STATE() 806 807 BEGIN_STATE(ScriptDataEscapedEndTagNameState) { 753 808 if (cc >= 'A' && cc <= 'Z') { 754 809 m_temporaryBuffer.append(cc); … … 785 840 break; 786 841 } 787 case ScriptDataDoubleEscapeStartState: { 842 END_STATE() 843 844 BEGIN_STATE(ScriptDataDoubleEscapeStartState) { 788 845 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ' || cc == '/' || cc == '>') { 789 846 emitCharacter(cc); … … 803 860 break; 804 861 } 805 case ScriptDataDoubleEscapedState: { 862 END_STATE() 863 864 BEGIN_STATE(ScriptDataDoubleEscapedState) { 806 865 if (cc == '-') { 807 866 emitCharacter(cc); … … 815 874 break; 816 875 } 817 case ScriptDataDoubleEscapedDashState: { 876 END_STATE() 877 878 BEGIN_STATE(ScriptDataDoubleEscapedDashState) { 818 879 if (cc == '-') { 819 880 emitCharacter(cc); … … 829 890 break; 830 891 } 831 case ScriptDataDoubleEscapedDashDashState: { 892 END_STATE() 893 894 BEGIN_STATE(ScriptDataDoubleEscapedDashDashState) { 832 895 if (cc == '-') 833 896 emitCharacter(cc); … … 845 908 break; 846 909 } 847 case ScriptDataDoubleEscapedLessThanSignState: { 910 END_STATE() 911 912 BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState) { 848 913 if (cc == '/') { 849 914 emitCharacter(cc); … … 855 920 break; 856 921 } 857 case ScriptDataDoubleEscapeEndState: { 922 END_STATE() 923 924 BEGIN_STATE(ScriptDataDoubleEscapeEndState) { 858 925 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ' || cc == '/' || cc == '>') { 859 926 emitCharacter(cc); … … 873 940 break; 874 941 } 875 case BeforeAttributeNameState: { 942 END_STATE() 943 944 BEGIN_STATE(BeforeAttributeNameState) { 876 945 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 877 946 break; … … 895 964 break; 896 965 } 897 case AttributeNameState: { 966 END_STATE() 967 968 BEGIN_STATE(AttributeNameState) { 898 969 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 899 970 m_state = AfterAttributeNameState; … … 916 987 break; 917 988 } 918 case AfterAttributeNameState: { 989 END_STATE() 990 991 BEGIN_STATE(AfterAttributeNameState) { 919 992 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 920 993 break; … … 940 1013 break; 941 1014 } 942 case BeforeAttributeValueState: { 1015 END_STATE() 1016 1017 BEGIN_STATE(BeforeAttributeValueState) { 943 1018 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 944 1019 break; … … 961 1036 break; 962 1037 } 963 case AttributeValueDoubleQuotedState: { 1038 END_STATE() 1039 1040 BEGIN_STATE(AttributeValueDoubleQuotedState) { 964 1041 if (cc == '"') 965 1042 m_state = AfterAttributeValueQuotedState; … … 972 1049 break; 973 1050 } 974 case AttributeValueSingleQuotedState: { 1051 END_STATE() 1052 1053 BEGIN_STATE(AttributeValueSingleQuotedState) { 975 1054 if (cc == '\'') 976 1055 m_state = AfterAttributeValueQuotedState; … … 983 1062 break; 984 1063 } 985 case AttributeValueUnquotedState: { 1064 END_STATE() 1065 1066 BEGIN_STATE(AttributeValueUnquotedState) { 986 1067 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 987 1068 m_state = BeforeAttributeNameState; … … 1000 1081 break; 1001 1082 } 1002 case CharacterReferenceInAttributeValueState: { 1083 END_STATE() 1084 1085 BEGIN_STATE(CharacterReferenceInAttributeValueState) { 1003 1086 bool notEnoughCharacters = false; 1004 1087 unsigned value = consumeEntity(source, notEnoughCharacters); … … 1027 1110 continue; 1028 1111 } 1029 case AfterAttributeValueQuotedState: { 1112 END_STATE() 1113 1114 BEGIN_STATE(AfterAttributeValueQuotedState) { 1030 1115 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1031 1116 m_state = BeforeAttributeNameState; … … 1042 1127 break; 1043 1128 } 1044 case SelfClosingStartTagState: { 1129 END_STATE() 1130 1131 BEGIN_STATE(SelfClosingStartTagState) { 1045 1132 if (cc == '>') { 1046 1133 notImplemented(); … … 1054 1141 break; 1055 1142 } 1056 case BogusCommentState: { 1143 END_STATE() 1144 1145 BEGIN_STATE(BogusCommentState) { 1057 1146 m_token->beginComment(); 1058 1147 while (!source.isEmpty()) { … … 1070 1159 break; 1071 1160 } 1072 case MarkupDeclarationOpenState: { 1161 END_STATE() 1162 1163 BEGIN_STATE(MarkupDeclarationOpenState) { 1073 1164 DEFINE_STATIC_LOCAL(String, dashDashString, ("--")); 1074 1165 DEFINE_STATIC_LOCAL(String, doctypeString, ("doctype")); … … 1096 1187 RECONSUME_IN(BogusCommentState); 1097 1188 } 1098 case CommentStartState: { 1189 END_STATE() 1190 1191 BEGIN_STATE(CommentStartState) { 1099 1192 if (cc == '-') 1100 1193 m_state = CommentStartDashState; … … 1110 1203 break; 1111 1204 } 1112 case CommentStartDashState: { 1205 END_STATE() 1206 1207 BEGIN_STATE(CommentStartDashState) { 1113 1208 if (cc == '-') 1114 1209 m_state = CommentEndState; … … 1125 1220 break; 1126 1221 } 1127 case CommentState: { 1222 END_STATE() 1223 1224 BEGIN_STATE(CommentState) { 1128 1225 if (cc == '-') 1129 1226 m_state = CommentEndDashState; … … 1133 1230 break; 1134 1231 } 1135 case CommentEndDashState: { 1232 END_STATE() 1233 1234 BEGIN_STATE(CommentEndDashState) { 1136 1235 if (cc == '-') 1137 1236 m_state = CommentEndState; … … 1144 1243 break; 1145 1244 } 1146 case CommentEndState: { 1245 END_STATE() 1246 1247 BEGIN_STATE(CommentEndState) { 1147 1248 if (cc == '>') { 1148 1249 emitCurrentToken(); … … 1171 1272 break; 1172 1273 } 1173 case CommentEndBangState: { 1274 END_STATE() 1275 1276 BEGIN_STATE(CommentEndBangState) { 1174 1277 if (cc == '-') { 1175 1278 m_token->appendToComment('-'); … … 1190 1293 break; 1191 1294 } 1192 case CommentEndSpaceState: { 1295 END_STATE() 1296 1297 BEGIN_STATE(CommentEndSpaceState) { 1193 1298 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1194 1299 m_token->appendToComment(cc); … … 1205 1310 break; 1206 1311 } 1207 case DOCTYPEState: { 1312 END_STATE() 1313 1314 BEGIN_STATE(DOCTYPEState) { 1208 1315 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1209 1316 m_state = BeforeDOCTYPENameState; … … 1215 1322 break; 1216 1323 } 1217 case BeforeDOCTYPENameState: { 1324 END_STATE() 1325 1326 BEGIN_STATE(BeforeDOCTYPENameState) { 1218 1327 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1219 1328 break; … … 1234 1343 break; 1235 1344 } 1236 case DOCTYPENameState: { 1345 END_STATE() 1346 1347 BEGIN_STATE(DOCTYPENameState) { 1237 1348 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1238 1349 m_state = AfterDOCTYPENameState; … … 1247 1358 break; 1248 1359 } 1249 case AfterDOCTYPENameState: { 1360 END_STATE() 1361 1362 BEGIN_STATE(AfterDOCTYPENameState) { 1250 1363 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1251 1364 break; … … 1278 1391 break; 1279 1392 } 1280 case AfterDOCTYPEPublicKeywordState: { 1393 END_STATE() 1394 1395 BEGIN_STATE(AfterDOCTYPEPublicKeywordState) { 1281 1396 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1282 1397 m_state = BeforeDOCTYPEPublicIdentifierState; … … 1302 1417 break; 1303 1418 } 1304 case BeforeDOCTYPEPublicIdentifierState: { 1419 END_STATE() 1420 1421 BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) { 1305 1422 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1306 1423 break; … … 1324 1441 break; 1325 1442 } 1326 case DOCTYPEPublicIdentifierDoubleQuotedState: { 1443 END_STATE() 1444 1445 BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) { 1327 1446 if (cc == '"') 1328 1447 m_state = AfterDOCTYPEPublicIdentifierState; … … 1337 1456 break; 1338 1457 } 1339 case DOCTYPEPublicIdentifierSingleQuotedState: { 1458 END_STATE() 1459 1460 BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) { 1340 1461 if (cc == '\'') 1341 1462 m_state = AfterDOCTYPEPublicIdentifierState; … … 1350 1471 break; 1351 1472 } 1352 case AfterDOCTYPEPublicIdentifierState: { 1473 END_STATE() 1474 1475 BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) { 1353 1476 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1354 1477 m_state = BetweenDOCTYPEPublicAndSystemIdentifiersState; … … 1372 1495 break; 1373 1496 } 1374 case BetweenDOCTYPEPublicAndSystemIdentifiersState: { 1497 END_STATE() 1498 1499 BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) { 1375 1500 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1376 1501 m_state = BetweenDOCTYPEPublicAndSystemIdentifiersState; … … 1392 1517 break; 1393 1518 } 1394 case AfterDOCTYPESystemKeywordState: { 1519 END_STATE() 1520 1521 BEGIN_STATE(AfterDOCTYPESystemKeywordState) { 1395 1522 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1396 1523 m_state = BeforeDOCTYPESystemIdentifierState; … … 1416 1543 break; 1417 1544 } 1418 case BeforeDOCTYPESystemIdentifierState: { 1545 END_STATE() 1546 1547 BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) { 1419 1548 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1420 1549 break; … … 1438 1567 break; 1439 1568 } 1440 case DOCTYPESystemIdentifierDoubleQuotedState: { 1569 END_STATE() 1570 1571 BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) { 1441 1572 if (cc == '"') 1442 1573 m_state = AfterDOCTYPESystemIdentifierState; … … 1451 1582 break; 1452 1583 } 1453 case DOCTYPESystemIdentifierSingleQuotedState: { 1584 END_STATE() 1585 1586 BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) { 1454 1587 if (cc == '\'') 1455 1588 m_state = AfterDOCTYPESystemIdentifierState; … … 1464 1597 break; 1465 1598 } 1466 case AfterDOCTYPESystemIdentifierState: { 1599 END_STATE() 1600 1601 BEGIN_STATE(AfterDOCTYPESystemIdentifierState) { 1467 1602 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1468 1603 break; … … 1477 1612 break; 1478 1613 } 1479 case BogusDOCTYPEState: { 1614 END_STATE() 1615 1616 BEGIN_STATE(BogusDOCTYPEState) { 1480 1617 if (cc == '>') { 1481 1618 emitCurrentToken(); … … 1485 1622 break; 1486 1623 } 1487 case CDATASectionState: { 1624 END_STATE() 1625 1626 BEGIN_STATE(CDATASectionState) { 1488 1627 notImplemented(); 1489 1628 break; 1490 1629 } 1630 END_STATE() 1631 1491 1632 } 1492 1633 source.advance(m_lineNumber);
Note: See TracChangeset
for help on using the changeset viewer.