Changeset 60704 in webkit
- Timestamp:
- Jun 4, 2010 1:06:20 PM (14 years ago)
- Location:
- trunk/WebCore
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/WebCore/ChangeLog
r60703 r60704 1 2010-06-04 Adam Barth <abarth@webkit.org> 2 3 Reviewed by Eric Seidel. 4 5 Make HTML5Lexer go fast 6 https://bugs.webkit.org/show_bug.cgi?id=40048 7 8 Change the intent of the loop to match what it's going to be once we 9 remove the loop. This is a whitespace only change that will make the 10 final diff much, much smaller. Sorry for the transient ugly style. 11 12 * html/HTML5Lexer.cpp: 13 (WebCore::HTML5Lexer::nextToken): 14 1 15 2010-06-04 Adam Barth <abarth@webkit.org> 2 16 -
trunk/WebCore/html/HTML5Lexer.cpp
r60703 r60704 375 375 // FIXME: This while should stop as soon as we have a token to return. 376 376 while (!source.isEmpty()) { 377 UChar cc = *source; 378 switch (m_state) { 379 BEGIN_STATE(DataState) { 380 if (cc == '&') 381 ADVANCE_TO(CharacterReferenceInDataState); 382 else if (cc == '<') { 383 if (m_token->type() == HTML5Token::Character) { 384 // We have a bunch of character tokens queued up that we 385 // are emitting lazily here. 386 return true; 377 // FIXME: This is a purposeful style violation because this while loop is 378 // going to be removed soon. 379 380 UChar cc = *source; 381 switch (m_state) { 382 BEGIN_STATE(DataState) { 383 if (cc == '&') 384 ADVANCE_TO(CharacterReferenceInDataState); 385 else if (cc == '<') { 386 if (m_token->type() == HTML5Token::Character) { 387 // We have a bunch of character tokens queued up that we 388 // are emitting lazily here. 389 return true; 390 } 391 ADVANCE_TO(TagOpenState); 392 } else 393 emitCharacter(cc); 394 break; 395 } 396 END_STATE() 397 398 BEGIN_STATE(CharacterReferenceInDataState) { 399 if (!processEntity(source)) 400 return shouldEmitBufferedCharacterToken(source); 401 RECONSUME_IN(DataState); 402 } 403 END_STATE() 404 405 BEGIN_STATE(RCDATAState) { 406 if (cc == '&') 407 ADVANCE_TO(CharacterReferenceInRCDATAState); 408 else if (cc == '<') 409 ADVANCE_TO(RCDATALessThanSignState); 410 else 411 emitCharacter(cc); 412 break; 413 } 414 END_STATE() 415 416 BEGIN_STATE(CharacterReferenceInRCDATAState) { 417 if (!processEntity(source)) 418 return shouldEmitBufferedCharacterToken(source); 419 RECONSUME_IN(RCDATAState); 420 } 421 END_STATE() 422 423 BEGIN_STATE(RAWTEXTState) { 424 if (cc == '<') 425 ADVANCE_TO(RAWTEXTLessThanSignState); 426 else 427 emitCharacter(cc); 428 break; 429 } 430 END_STATE() 431 432 BEGIN_STATE(ScriptDataState) { 433 if (cc == '<') 434 ADVANCE_TO(ScriptDataLessThanSignState); 435 else 436 emitCharacter(cc); 437 break; 438 } 439 END_STATE() 440 441 BEGIN_STATE(PLAINTEXTState) { 442 emitCharacter(cc); 443 break; 444 } 445 END_STATE() 446 447 BEGIN_STATE(TagOpenState) { 448 if (cc == '!') 449 ADVANCE_TO(MarkupDeclarationOpenState); 450 else if (cc == '/') 451 ADVANCE_TO(EndTagOpenState); 452 else if (cc >= 'A' && cc <= 'Z') { 453 m_token->beginStartTag(toLowerCase(cc)); 454 ADVANCE_TO(TagNameState); 455 } else if (cc >= 'a' && cc <= 'z') { 456 m_token->beginStartTag(cc); 457 ADVANCE_TO(TagNameState); 458 } else if (cc == '?') { 459 emitParseError(); 460 // The spec consumes the current character before switching 461 // to the bogus comment state, but it's easier to implement 462 // if we reconsume the current character. 463 RECONSUME_IN(BogusCommentState); 464 } else { 465 emitParseError(); 466 emitCharacter('<'); 467 RECONSUME_IN(DataState); 468 } 469 break; 470 } 471 END_STATE() 472 473 BEGIN_STATE(EndTagOpenState) { 474 if (cc >= 'A' && cc <= 'Z') { 475 m_token->beginEndTag(toLowerCase(cc)); 476 ADVANCE_TO(TagNameState); 477 } else if (cc >= 'a' && cc <= 'z') { 478 m_token->beginEndTag(cc); 479 ADVANCE_TO(TagNameState); 480 } else if (cc == '>') { 481 emitParseError(); 482 ADVANCE_TO(DataState); 483 } else { 484 emitParseError(); 485 RECONSUME_IN(BogusCommentState); 486 } 487 // FIXME: Handle EOF properly. 488 break; 489 } 490 END_STATE() 491 492 BEGIN_STATE(TagNameState) { 493 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 494 ADVANCE_TO(BeforeAttributeNameState); 495 else if (cc == '/') 496 ADVANCE_TO(SelfClosingStartTagState); 497 else if (cc == '>') { 498 EMIT_AND_RESUME_IN(DataState); 499 } else if (cc >= 'A' && cc <= 'Z') 500 m_token->appendToName(toLowerCase(cc)); 501 else 502 m_token->appendToName(cc); 503 // FIXME: Handle EOF properly. 504 break; 505 } 506 END_STATE() 507 508 BEGIN_STATE(RCDATALessThanSignState) { 509 if (cc == '/') { 510 m_temporaryBuffer.clear(); 511 ASSERT(m_bufferedEndTagName.isEmpty()); 512 ADVANCE_TO(RCDATAEndTagOpenState); 513 } else { 514 emitCharacter('<'); 515 RECONSUME_IN(RCDATAState); 516 } 517 break; 518 } 519 END_STATE() 520 521 BEGIN_STATE(RCDATAEndTagOpenState) { 522 if (cc >= 'A' && cc <= 'Z') { 523 m_temporaryBuffer.append(cc); 524 addToPossibleEndTag(toLowerCase(cc)); 525 ADVANCE_TO(RCDATAEndTagNameState); 526 } else if (cc >= 'a' && cc <= 'z') { 527 m_temporaryBuffer.append(cc); 528 addToPossibleEndTag(cc); 529 ADVANCE_TO(RCDATAEndTagNameState); 530 } else { 531 emitCharacter('<'); 532 emitCharacter('/'); 533 RECONSUME_IN(RCDATAState); 534 } 535 break; 536 } 537 END_STATE() 538 539 BEGIN_STATE(RCDATAEndTagNameState) { 540 if (cc >= 'A' && cc <= 'Z') { 541 m_temporaryBuffer.append(cc); 542 addToPossibleEndTag(toLowerCase(cc)); 543 } else if (cc >= 'a' && cc <= 'z') { 544 m_temporaryBuffer.append(cc); 545 addToPossibleEndTag(cc); 546 } else { 547 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') { 548 if (isAppropriateEndTag()) { 549 FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); 387 550 } 388 ADVANCE_TO(TagOpenState); 389 } else 390 emitCharacter(cc); 551 } else if (cc == '/') { 552 if (isAppropriateEndTag()) { 553 FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); 554 } 555 } else if (cc == '>') { 556 if (isAppropriateEndTag()) { 557 FLUSH_EMIT_AND_RESUME_IN(DataState); 558 } 559 } 560 emitCharacter('<'); 561 emitCharacter('/'); 562 m_token->appendToCharacter(m_temporaryBuffer); 563 m_bufferedEndTagName.clear(); 564 RECONSUME_IN(RCDATAState); 565 } 566 break; 567 } 568 END_STATE() 569 570 BEGIN_STATE(RAWTEXTLessThanSignState) { 571 if (cc == '/') { 572 m_temporaryBuffer.clear(); 573 ASSERT(m_bufferedEndTagName.isEmpty()); 574 ADVANCE_TO(RAWTEXTEndTagOpenState); 575 } else { 576 emitCharacter('<'); 577 RECONSUME_IN(RAWTEXTState); 578 } 579 break; 580 } 581 END_STATE() 582 583 BEGIN_STATE(RAWTEXTEndTagOpenState) { 584 if (cc >= 'A' && cc <= 'Z') { 585 m_temporaryBuffer.append(cc); 586 addToPossibleEndTag(toLowerCase(cc)); 587 ADVANCE_TO(RAWTEXTEndTagNameState); 588 } else if (cc >= 'a' && cc <= 'z') { 589 m_temporaryBuffer.append(cc); 590 addToPossibleEndTag(cc); 591 ADVANCE_TO(RAWTEXTEndTagNameState); 592 } else { 593 emitCharacter('<'); 594 emitCharacter('/'); 595 RECONSUME_IN(RAWTEXTState); 596 } 597 break; 598 } 599 END_STATE() 600 601 BEGIN_STATE(RAWTEXTEndTagNameState) { 602 if (cc >= 'A' && cc <= 'Z') { 603 m_temporaryBuffer.append(cc); 604 addToPossibleEndTag(toLowerCase(cc)); 605 } else if (cc >= 'a' && cc <= 'z') { 606 m_temporaryBuffer.append(cc); 607 addToPossibleEndTag(cc); 608 } else { 609 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') { 610 if (isAppropriateEndTag()) { 611 FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); 612 } 613 } else if (cc == '/') { 614 if (isAppropriateEndTag()) { 615 FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); 616 } 617 } else if (cc == '>') { 618 if (isAppropriateEndTag()) { 619 FLUSH_EMIT_AND_RESUME_IN(DataState); 620 } 621 } 622 emitCharacter('<'); 623 emitCharacter('/'); 624 m_token->appendToCharacter(m_temporaryBuffer); 625 m_bufferedEndTagName.clear(); 626 RECONSUME_IN(RAWTEXTState); 627 } 628 break; 629 } 630 END_STATE() 631 632 BEGIN_STATE(ScriptDataLessThanSignState) { 633 if (cc == '/') { 634 m_temporaryBuffer.clear(); 635 ASSERT(m_bufferedEndTagName.isEmpty()); 636 ADVANCE_TO(ScriptDataEndTagOpenState); 637 } else if (cc == '!') { 638 emitCharacter('<'); 639 emitCharacter('!'); 640 ADVANCE_TO(ScriptDataEscapeStartState); 641 } else { 642 emitCharacter('<'); 643 RECONSUME_IN(ScriptDataState); 644 } 645 break; 646 } 647 END_STATE() 648 649 BEGIN_STATE(ScriptDataEndTagOpenState) { 650 if (cc >= 'A' && cc <= 'Z') { 651 m_temporaryBuffer.append(cc); 652 addToPossibleEndTag(toLowerCase(cc)); 653 ADVANCE_TO(ScriptDataEndTagNameState); 654 } else if (cc >= 'a' && cc <= 'z') { 655 m_temporaryBuffer.append(cc); 656 addToPossibleEndTag(cc); 657 ADVANCE_TO(ScriptDataEndTagNameState); 658 } else { 659 emitCharacter('<'); 660 emitCharacter('/'); 661 RECONSUME_IN(ScriptDataState); 662 } 663 break; 664 } 665 END_STATE() 666 667 BEGIN_STATE(ScriptDataEndTagNameState) { 668 if (cc >= 'A' && cc <= 'Z') { 669 m_temporaryBuffer.append(cc); 670 addToPossibleEndTag(toLowerCase(cc)); 671 } else if (cc >= 'a' && cc <= 'z') { 672 m_temporaryBuffer.append(cc); 673 addToPossibleEndTag(cc); 674 } else { 675 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') { 676 if (isAppropriateEndTag()) { 677 FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); 678 } 679 } else if (cc == '/') { 680 if (isAppropriateEndTag()) { 681 FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); 682 } 683 } else if (cc == '>') { 684 if (isAppropriateEndTag()) { 685 FLUSH_EMIT_AND_RESUME_IN(DataState); 686 } 687 } 688 emitCharacter('<'); 689 emitCharacter('/'); 690 m_token->appendToCharacter(m_temporaryBuffer); 691 m_bufferedEndTagName.clear(); 692 RECONSUME_IN(ScriptDataState); 693 } 694 break; 695 } 696 END_STATE() 697 698 BEGIN_STATE(ScriptDataEscapeStartState) { 699 if (cc == '-') { 700 emitCharacter(cc); 701 ADVANCE_TO(ScriptDataEscapeStartDashState); 702 } else { 703 RECONSUME_IN(ScriptDataState); 704 } 705 break; 706 } 707 END_STATE() 708 709 BEGIN_STATE(ScriptDataEscapeStartDashState) { 710 if (cc == '-') { 711 emitCharacter(cc); 712 ADVANCE_TO(ScriptDataEscapedDashDashState); 713 } else { 714 RECONSUME_IN(ScriptDataState); 715 } 716 break; 717 } 718 END_STATE() 719 720 BEGIN_STATE(ScriptDataEscapedState) { 721 if (cc == '-') { 722 emitCharacter(cc); 723 ADVANCE_TO(ScriptDataEscapedDashState); 724 } else if (cc == '<') 725 ADVANCE_TO(ScriptDataEscapedLessThanSignState); 726 else 727 emitCharacter(cc); 728 // FIXME: Handle EOF properly. 729 break; 730 } 731 END_STATE() 732 733 BEGIN_STATE(ScriptDataEscapedDashState) { 734 if (cc == '-') { 735 emitCharacter(cc); 736 ADVANCE_TO(ScriptDataEscapedDashDashState); 737 } else if (cc == '<') 738 ADVANCE_TO(ScriptDataEscapedLessThanSignState); 739 else { 740 emitCharacter(cc); 741 ADVANCE_TO(ScriptDataEscapedState); 742 } 743 // FIXME: Handle EOF properly. 744 break; 745 } 746 END_STATE() 747 748 BEGIN_STATE(ScriptDataEscapedDashDashState) { 749 if (cc == '-') 750 emitCharacter(cc); 751 else if (cc == '<') 752 ADVANCE_TO(ScriptDataEscapedLessThanSignState); 753 else if (cc == '>') { 754 emitCharacter(cc); 755 ADVANCE_TO(ScriptDataState); 756 } else { 757 emitCharacter(cc); 758 ADVANCE_TO(ScriptDataEscapedState); 759 } 760 // FIXME: Handle EOF properly. 761 break; 762 } 763 END_STATE() 764 765 BEGIN_STATE(ScriptDataEscapedLessThanSignState) { 766 if (cc == '/') { 767 m_temporaryBuffer.clear(); 768 ASSERT(m_bufferedEndTagName.isEmpty()); 769 ADVANCE_TO(ScriptDataEscapedEndTagOpenState); 770 } else if (cc >= 'A' && cc <= 'Z') { 771 emitCharacter('<'); 772 emitCharacter(cc); 773 m_temporaryBuffer.clear(); 774 m_temporaryBuffer.append(toLowerCase(cc)); 775 ADVANCE_TO(ScriptDataDoubleEscapeStartState); 776 } else if (cc >= 'a' && cc <= 'z') { 777 emitCharacter('<'); 778 emitCharacter(cc); 779 m_temporaryBuffer.clear(); 780 m_temporaryBuffer.append(cc); 781 ADVANCE_TO(ScriptDataDoubleEscapeStartState); 782 } else { 783 emitCharacter('<'); 784 RECONSUME_IN(ScriptDataEscapedState); 785 } 786 break; 787 } 788 END_STATE() 789 790 BEGIN_STATE(ScriptDataEscapedEndTagOpenState) { 791 if (cc >= 'A' && cc <= 'Z') { 792 m_temporaryBuffer.append(cc); 793 addToPossibleEndTag(toLowerCase(cc)); 794 ADVANCE_TO(ScriptDataEscapedEndTagNameState); 795 } else if (cc >= 'a' && cc <= 'z') { 796 m_temporaryBuffer.append(cc); 797 addToPossibleEndTag(cc); 798 ADVANCE_TO(ScriptDataEscapedEndTagNameState); 799 } else { 800 emitCharacter('<'); 801 emitCharacter('/'); 802 RECONSUME_IN(ScriptDataEscapedState); 803 } 804 break; 805 } 806 END_STATE() 807 808 BEGIN_STATE(ScriptDataEscapedEndTagNameState) { 809 if (cc >= 'A' && cc <= 'Z') { 810 m_temporaryBuffer.append(cc); 811 addToPossibleEndTag(toLowerCase(cc)); 812 } else if (cc >= 'a' && cc <= 'z') { 813 m_temporaryBuffer.append(cc); 814 addToPossibleEndTag(cc); 815 } else { 816 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') { 817 if (isAppropriateEndTag()) { 818 FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); 819 } 820 } else if (cc == '/') { 821 if (isAppropriateEndTag()) { 822 FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); 823 } 824 } else if (cc == '>') { 825 if (isAppropriateEndTag()) { 826 FLUSH_EMIT_AND_RESUME_IN(DataState); 827 } 828 } 829 emitCharacter('<'); 830 emitCharacter('/'); 831 m_token->appendToCharacter(m_temporaryBuffer); 832 m_bufferedEndTagName.clear(); 833 RECONSUME_IN(ScriptDataEscapedState); 834 } 835 break; 836 } 837 END_STATE() 838 839 BEGIN_STATE(ScriptDataDoubleEscapeStartState) { 840 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ' || cc == '/' || cc == '>') { 841 emitCharacter(cc); 842 if (temporaryBufferIs(scriptTag.localName())) 843 ADVANCE_TO(ScriptDataDoubleEscapedState); 844 else 845 ADVANCE_TO(ScriptDataEscapedState); 846 } else if (cc >= 'A' && cc <= 'Z') { 847 emitCharacter(cc); 848 m_temporaryBuffer.append(toLowerCase(cc)); 849 } else if (cc >= 'a' && cc <= 'z') { 850 emitCharacter(cc); 851 m_temporaryBuffer.append(cc); 852 } else { 853 RECONSUME_IN(ScriptDataEscapedState); 854 } 855 break; 856 } 857 END_STATE() 858 859 BEGIN_STATE(ScriptDataDoubleEscapedState) { 860 if (cc == '-') { 861 emitCharacter(cc); 862 ADVANCE_TO(ScriptDataDoubleEscapedDashState); 863 } else if (cc == '<') { 864 emitCharacter(cc); 865 ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState); 866 } else 867 emitCharacter(cc); 868 // FIXME: Handle EOF properly. 869 break; 870 } 871 END_STATE() 872 873 BEGIN_STATE(ScriptDataDoubleEscapedDashState) { 874 if (cc == '-') { 875 emitCharacter(cc); 876 ADVANCE_TO(ScriptDataDoubleEscapedDashDashState); 877 } else if (cc == '<') { 878 emitCharacter(cc); 879 ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState); 880 } else { 881 emitCharacter(cc); 882 ADVANCE_TO(ScriptDataDoubleEscapedState); 883 } 884 // FIXME: Handle EOF properly. 885 break; 886 } 887 END_STATE() 888 889 BEGIN_STATE(ScriptDataDoubleEscapedDashDashState) { 890 if (cc == '-') 891 emitCharacter(cc); 892 else if (cc == '<') { 893 emitCharacter(cc); 894 ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState); 895 } else if (cc == '>') { 896 emitCharacter(cc); 897 ADVANCE_TO(ScriptDataState); 898 } else { 899 emitCharacter(cc); 900 ADVANCE_TO(ScriptDataDoubleEscapedState); 901 } 902 // FIXME: Handle EOF properly. 903 break; 904 } 905 END_STATE() 906 907 BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState) { 908 if (cc == '/') { 909 emitCharacter(cc); 910 m_temporaryBuffer.clear(); 911 ADVANCE_TO(ScriptDataDoubleEscapeEndState); 912 } else { 913 RECONSUME_IN(ScriptDataDoubleEscapedState); 914 } 915 break; 916 } 917 END_STATE() 918 919 BEGIN_STATE(ScriptDataDoubleEscapeEndState) { 920 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ' || cc == '/' || cc == '>') { 921 emitCharacter(cc); 922 if (temporaryBufferIs(scriptTag.localName())) 923 ADVANCE_TO(ScriptDataEscapedState); 924 else 925 ADVANCE_TO(ScriptDataDoubleEscapedState); 926 } else if (cc >= 'A' && cc <= 'Z') { 927 emitCharacter(cc); 928 m_temporaryBuffer.append(toLowerCase(cc)); 929 } else if (cc >= 'a' && cc <= 'z') { 930 emitCharacter(cc); 931 m_temporaryBuffer.append(cc); 932 } else { 933 RECONSUME_IN(ScriptDataDoubleEscapedState); 934 } 935 break; 936 } 937 END_STATE() 938 939 BEGIN_STATE(BeforeAttributeNameState) { 940 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 391 941 break; 392 } 393 END_STATE() 394 395 BEGIN_STATE(CharacterReferenceInDataState) { 396 if (!processEntity(source)) 942 else if (cc == '/') 943 ADVANCE_TO(SelfClosingStartTagState); 944 else if (cc == '>') { 945 EMIT_AND_RESUME_IN(DataState); 946 } else if (cc >= 'A' && cc <= 'Z') { 947 m_token->addNewAttribute(); 948 m_token->appendToAttributeName(toLowerCase(cc)); 949 ADVANCE_TO(AttributeNameState); 950 } else { 951 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') 952 emitParseError(); 953 m_token->addNewAttribute(); 954 m_token->appendToAttributeName(cc); 955 ADVANCE_TO(AttributeNameState); 956 } 957 // FIXME: Handle EOF properly. 958 break; 959 } 960 END_STATE() 961 962 BEGIN_STATE(AttributeNameState) { 963 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 964 ADVANCE_TO(AfterAttributeNameState); 965 else if (cc == '/') 966 ADVANCE_TO(SelfClosingStartTagState); 967 else if (cc == '=') 968 ADVANCE_TO(BeforeAttributeValueState); 969 else if (cc == '>') { 970 EMIT_AND_RESUME_IN(DataState); 971 } else if (cc >= 'A' && cc <= 'Z') 972 m_token->appendToAttributeName(toLowerCase(cc)); 973 else { 974 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') 975 emitParseError(); 976 m_token->appendToAttributeName(cc); 977 ADVANCE_TO(AttributeNameState); 978 } 979 // FIXME: Handle EOF properly. 980 break; 981 } 982 END_STATE() 983 984 BEGIN_STATE(AfterAttributeNameState) { 985 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 986 break; 987 else if (cc == '/') 988 ADVANCE_TO(SelfClosingStartTagState); 989 else if (cc == '=') 990 ADVANCE_TO(BeforeAttributeValueState); 991 else if (cc == '=') { 992 EMIT_AND_RESUME_IN(DataState); 993 } else if (cc >= 'A' && cc <= 'Z') { 994 m_token->addNewAttribute(); 995 m_token->appendToAttributeName(toLowerCase(cc)); 996 ADVANCE_TO(AttributeNameState); 997 } else { 998 if (cc == '"' || cc == '\'' || cc == '<') 999 emitParseError(); 1000 m_token->addNewAttribute(); 1001 m_token->appendToAttributeName(cc); 1002 ADVANCE_TO(AttributeNameState); 1003 } 1004 // FIXME: Handle EOF properly. 1005 break; 1006 } 1007 END_STATE() 1008 1009 BEGIN_STATE(BeforeAttributeValueState) { 1010 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1011 break; 1012 else if (cc == '"') 1013 ADVANCE_TO(AttributeValueDoubleQuotedState); 1014 else if (cc == '&') { 1015 RECONSUME_IN(AttributeValueUnquotedState); 1016 } else if (cc == '\'') 1017 ADVANCE_TO(AttributeValueSingleQuotedState); 1018 else if (cc == '>') { 1019 emitParseError(); 1020 EMIT_AND_RESUME_IN(DataState); 1021 } else { 1022 if (cc == '<' || cc == '=' || cc == '`') 1023 emitParseError(); 1024 m_token->appendToAttributeValue(cc); 1025 ADVANCE_TO(AttributeValueUnquotedState); 1026 } 1027 break; 1028 } 1029 END_STATE() 1030 1031 BEGIN_STATE(AttributeValueDoubleQuotedState) { 1032 if (cc == '"') 1033 ADVANCE_TO(AfterAttributeValueQuotedState); 1034 else if (cc == '&') { 1035 m_additionalAllowedCharacter = '"'; 1036 ADVANCE_TO(CharacterReferenceInAttributeValueState); 1037 } else 1038 m_token->appendToAttributeValue(cc); 1039 // FIXME: Handle EOF properly. 1040 break; 1041 } 1042 END_STATE() 1043 1044 BEGIN_STATE(AttributeValueSingleQuotedState) { 1045 if (cc == '\'') 1046 ADVANCE_TO(AfterAttributeValueQuotedState); 1047 else if (cc == '&') { 1048 m_additionalAllowedCharacter = '\''; 1049 ADVANCE_TO(CharacterReferenceInAttributeValueState); 1050 } else 1051 m_token->appendToAttributeValue(cc); 1052 // FIXME: Handle EOF properly. 1053 break; 1054 } 1055 END_STATE() 1056 1057 BEGIN_STATE(AttributeValueUnquotedState) { 1058 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1059 ADVANCE_TO(BeforeAttributeNameState); 1060 else if (cc == '&') { 1061 m_additionalAllowedCharacter = '>'; 1062 ADVANCE_TO(CharacterReferenceInAttributeValueState); 1063 } else if (cc == '>') { 1064 EMIT_AND_RESUME_IN(DataState); 1065 } else { 1066 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`') 1067 emitParseError(); 1068 m_token->appendToAttributeValue(cc); 1069 } 1070 // FIXME: Handle EOF properly. 1071 break; 1072 } 1073 END_STATE() 1074 1075 BEGIN_STATE(CharacterReferenceInAttributeValueState) { 1076 bool notEnoughCharacters = false; 1077 unsigned value = consumeEntity(source, notEnoughCharacters); 1078 if (notEnoughCharacters) 1079 return shouldEmitBufferedCharacterToken(source); 1080 if (!value) 1081 m_token->appendToAttributeValue('&'); 1082 else if (value < 0xFFFF) 1083 m_token->appendToAttributeValue(value); 1084 else { 1085 m_token->appendToAttributeValue(U16_LEAD(value)); 1086 m_token->appendToAttributeValue(U16_TRAIL(value)); 1087 } 1088 // We're supposed to switch back to the attribute value state that 1089 // we were in when we were switched into this state. Rather than 1090 // keeping track of this explictly, we observe that the previous 1091 // state can be determined by m_additionalAllowedCharacter. 1092 if (m_additionalAllowedCharacter == '"') 1093 RECONSUME_IN(AttributeValueDoubleQuotedState) 1094 else if (m_additionalAllowedCharacter == '\'') 1095 RECONSUME_IN(AttributeValueSingleQuotedState) 1096 else if (m_additionalAllowedCharacter == '>') 1097 RECONSUME_IN(AttributeValueUnquotedState) 1098 else 1099 ASSERT_NOT_REACHED(); 1100 break; 1101 } 1102 END_STATE() 1103 1104 BEGIN_STATE(AfterAttributeValueQuotedState) { 1105 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1106 ADVANCE_TO(BeforeAttributeNameState); 1107 else if (cc == '/') 1108 ADVANCE_TO(SelfClosingStartTagState); 1109 else if (cc == '>') { 1110 EMIT_AND_RESUME_IN(DataState); 1111 } else { 1112 emitParseError(); 1113 RECONSUME_IN(BeforeAttributeNameState); 1114 } 1115 // FIXME: Handle EOF properly. 1116 break; 1117 } 1118 END_STATE() 1119 1120 BEGIN_STATE(SelfClosingStartTagState) { 1121 if (cc == '>') { 1122 notImplemented(); 1123 EMIT_AND_RESUME_IN(DataState); 1124 } else { 1125 emitParseError(); 1126 RECONSUME_IN(BeforeAttributeNameState); 1127 } 1128 // FIXME: Handle EOF properly. 1129 break; 1130 } 1131 END_STATE() 1132 1133 BEGIN_STATE(BogusCommentState) { 1134 m_token->beginComment(); 1135 while (!source.isEmpty()) { 1136 cc = *source; 1137 if (cc == '>') 1138 break; 1139 m_token->appendToComment(cc); 1140 source.advance(m_lineNumber); 1141 } 1142 EMIT_AND_RESUME_IN(DataState); 1143 if (source.isEmpty()) 1144 return true; 1145 // FIXME: Handle EOF properly. 1146 break; 1147 } 1148 END_STATE() 1149 1150 BEGIN_STATE(MarkupDeclarationOpenState) { 1151 DEFINE_STATIC_LOCAL(String, dashDashString, ("--")); 1152 DEFINE_STATIC_LOCAL(String, doctypeString, ("doctype")); 1153 if (cc == '-') { 1154 SegmentedString::LookAheadResult result = source.lookAhead(dashDashString); 1155 if (result == SegmentedString::DidMatch) { 1156 source.advanceAndASSERT('-'); 1157 source.advanceAndASSERT('-'); 1158 m_token->beginComment(); 1159 RECONSUME_IN(CommentStartState); 1160 } else if (result == SegmentedString::NotEnoughCharacters) 397 1161 return shouldEmitBufferedCharacterToken(source); 398 RECONSUME_IN(DataState); 399 } 400 END_STATE() 401 402 BEGIN_STATE(RCDATAState) { 403 if (cc == '&') 404 ADVANCE_TO(CharacterReferenceInRCDATAState); 405 else if (cc == '<') 406 ADVANCE_TO(RCDATALessThanSignState); 407 else 408 emitCharacter(cc); 1162 } else if (cc == 'D' || cc == 'd') { 1163 SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(doctypeString); 1164 if (result == SegmentedString::DidMatch) { 1165 advanceStringAndASSERTIgnoringCase(source, "doctype"); 1166 RECONSUME_IN(DOCTYPEState); 1167 } else if (result == SegmentedString::NotEnoughCharacters) 1168 return shouldEmitBufferedCharacterToken(source); 1169 } 1170 notImplemented(); 1171 // FIXME: We're still missing the bits about the insertion mode being in foreign content: 1172 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#markup-declaration-open-state 1173 emitParseError(); 1174 RECONSUME_IN(BogusCommentState); 1175 } 1176 END_STATE() 1177 1178 BEGIN_STATE(CommentStartState) { 1179 if (cc == '-') 1180 ADVANCE_TO(CommentStartDashState); 1181 else if (cc == '>') { 1182 emitParseError(); 1183 EMIT_AND_RESUME_IN(DataState); 1184 } else { 1185 m_token->appendToComment(cc); 1186 ADVANCE_TO(CommentState); 1187 } 1188 // FIXME: Handle EOF properly. 1189 break; 1190 } 1191 END_STATE() 1192 1193 BEGIN_STATE(CommentStartDashState) { 1194 if (cc == '-') 1195 ADVANCE_TO(CommentEndState); 1196 else if (cc == '>') { 1197 emitParseError(); 1198 EMIT_AND_RESUME_IN(DataState); 1199 } else { 1200 m_token->appendToComment('-'); 1201 m_token->appendToComment(cc); 1202 ADVANCE_TO(CommentState); 1203 } 1204 // FIXME: Handle EOF properly. 1205 break; 1206 } 1207 END_STATE() 1208 1209 BEGIN_STATE(CommentState) { 1210 if (cc == '-') 1211 ADVANCE_TO(CommentEndDashState); 1212 else 1213 m_token->appendToComment(cc); 1214 // FIXME: Handle EOF properly. 1215 break; 1216 } 1217 END_STATE() 1218 1219 BEGIN_STATE(CommentEndDashState) { 1220 if (cc == '-') 1221 ADVANCE_TO(CommentEndState); 1222 else { 1223 m_token->appendToComment('-'); 1224 m_token->appendToComment(cc); 1225 ADVANCE_TO(CommentState); 1226 } 1227 // FIXME: Handle EOF properly. 1228 break; 1229 } 1230 END_STATE() 1231 1232 BEGIN_STATE(CommentEndState) { 1233 if (cc == '>') { 1234 EMIT_AND_RESUME_IN(DataState); 1235 } else if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') { 1236 emitParseError(); 1237 m_token->appendToComment('-'); 1238 m_token->appendToComment('-'); 1239 m_token->appendToComment(cc); 1240 ADVANCE_TO(CommentEndSpaceState); 1241 } else if (cc == '!') { 1242 emitParseError(); 1243 ADVANCE_TO(CommentEndBangState); 1244 } else if (cc == '-') { 1245 emitParseError(); 1246 m_token->appendToComment('-'); 1247 m_token->appendToComment(cc); 1248 } else { 1249 emitParseError(); 1250 m_token->appendToComment('-'); 1251 m_token->appendToComment('-'); 1252 m_token->appendToComment(cc); 1253 ADVANCE_TO(CommentState); 1254 } 1255 // FIXME: Handle EOF properly. 1256 break; 1257 } 1258 END_STATE() 1259 1260 BEGIN_STATE(CommentEndBangState) { 1261 if (cc == '-') { 1262 m_token->appendToComment('-'); 1263 m_token->appendToComment('-'); 1264 m_token->appendToComment('!'); 1265 ADVANCE_TO(CommentEndDashState); 1266 } else if (cc == '>') { 1267 EMIT_AND_RESUME_IN(DataState); 1268 } else { 1269 m_token->appendToComment('-'); 1270 m_token->appendToComment('-'); 1271 m_token->appendToComment('!'); 1272 m_token->appendToComment(cc); 1273 ADVANCE_TO(CommentState); 1274 } 1275 // FIXME: Handle EOF properly. 1276 break; 1277 } 1278 END_STATE() 1279 1280 BEGIN_STATE(CommentEndSpaceState) { 1281 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1282 m_token->appendToComment(cc); 1283 else if (cc == '-') 1284 ADVANCE_TO(CommentEndDashState); 1285 else if (cc == '>') { 1286 EMIT_AND_RESUME_IN(DataState); 1287 } else { 1288 m_token->appendToComment(cc); 1289 ADVANCE_TO(CommentState); 1290 } 1291 // FIXME: Handle EOF properly. 1292 break; 1293 } 1294 END_STATE() 1295 1296 BEGIN_STATE(DOCTYPEState) { 1297 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1298 ADVANCE_TO(BeforeDOCTYPENameState); 1299 else { 1300 emitParseError(); 1301 RECONSUME_IN(BeforeDOCTYPENameState); 1302 } 1303 // FIXME: Handle EOF properly. 1304 break; 1305 } 1306 END_STATE() 1307 1308 BEGIN_STATE(BeforeDOCTYPENameState) { 1309 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 409 1310 break; 410 } 411 END_STATE() 412 413 BEGIN_STATE(CharacterReferenceInRCDATAState) { 414 if (!processEntity(source)) 415 return shouldEmitBufferedCharacterToken(source); 416 RECONSUME_IN(RCDATAState); 417 } 418 END_STATE() 419 420 BEGIN_STATE(RAWTEXTState) { 421 if (cc == '<') 422 ADVANCE_TO(RAWTEXTLessThanSignState); 423 else 424 emitCharacter(cc); 1311 else if (cc >= 'A' && cc <= 'Z') { 1312 m_token->beginDOCTYPE(toLowerCase(cc)); 1313 ADVANCE_TO(DOCTYPENameState); 1314 } else if (cc == '>') { 1315 emitParseError(); 1316 m_token->beginDOCTYPE(); 1317 notImplemented(); 1318 EMIT_AND_RESUME_IN(DataState); 1319 } else { 1320 m_token->beginDOCTYPE(cc); 1321 ADVANCE_TO(DOCTYPENameState); 1322 } 1323 // FIXME: Handle EOF properly. 1324 break; 1325 } 1326 END_STATE() 1327 1328 BEGIN_STATE(DOCTYPENameState) { 1329 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1330 ADVANCE_TO(AfterDOCTYPENameState); 1331 else if (cc == '>') { 1332 EMIT_AND_RESUME_IN(DataState); 1333 } else if (cc >= 'A' && cc <= 'Z') 1334 m_token->appendToName(toLowerCase(cc)); 1335 else 1336 m_token->appendToName(cc); 1337 // FIXME: Handle EOF properly. 1338 break; 1339 } 1340 END_STATE() 1341 1342 BEGIN_STATE(AfterDOCTYPENameState) { 1343 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 425 1344 break; 426 } 427 END_STATE() 428 429 BEGIN_STATE(ScriptDataState) { 430 if (cc == '<') 431 ADVANCE_TO(ScriptDataLessThanSignState); 432 else 433 emitCharacter(cc); 434 break; 435 } 436 END_STATE() 437 438 BEGIN_STATE(PLAINTEXTState) { 439 emitCharacter(cc); 440 break; 441 } 442 END_STATE() 443 444 BEGIN_STATE(TagOpenState) { 445 if (cc == '!') 446 ADVANCE_TO(MarkupDeclarationOpenState); 447 else if (cc == '/') 448 ADVANCE_TO(EndTagOpenState); 449 else if (cc >= 'A' && cc <= 'Z') { 450 m_token->beginStartTag(toLowerCase(cc)); 451 ADVANCE_TO(TagNameState); 452 } else if (cc >= 'a' && cc <= 'z') { 453 m_token->beginStartTag(cc); 454 ADVANCE_TO(TagNameState); 455 } else if (cc == '?') { 456 emitParseError(); 457 // The spec consumes the current character before switching 458 // to the bogus comment state, but it's easier to implement 459 // if we reconsume the current character. 460 RECONSUME_IN(BogusCommentState); 461 } else { 462 emitParseError(); 463 emitCharacter('<'); 464 RECONSUME_IN(DataState); 465 } 466 break; 467 } 468 END_STATE() 469 470 BEGIN_STATE(EndTagOpenState) { 471 if (cc >= 'A' && cc <= 'Z') { 472 m_token->beginEndTag(toLowerCase(cc)); 473 ADVANCE_TO(TagNameState); 474 } else if (cc >= 'a' && cc <= 'z') { 475 m_token->beginEndTag(cc); 476 ADVANCE_TO(TagNameState); 477 } else if (cc == '>') { 478 emitParseError(); 479 ADVANCE_TO(DataState); 480 } else { 481 emitParseError(); 482 RECONSUME_IN(BogusCommentState); 483 } 484 // FIXME: Handle EOF properly. 485 break; 486 } 487 END_STATE() 488 489 BEGIN_STATE(TagNameState) { 490 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 491 ADVANCE_TO(BeforeAttributeNameState); 492 else if (cc == '/') 493 ADVANCE_TO(SelfClosingStartTagState); 494 else if (cc == '>') { 495 EMIT_AND_RESUME_IN(DataState); 496 } else if (cc >= 'A' && cc <= 'Z') 497 m_token->appendToName(toLowerCase(cc)); 498 else 499 m_token->appendToName(cc); 500 // FIXME: Handle EOF properly. 501 break; 502 } 503 END_STATE() 504 505 BEGIN_STATE(RCDATALessThanSignState) { 506 if (cc == '/') { 507 m_temporaryBuffer.clear(); 508 ASSERT(m_bufferedEndTagName.isEmpty()); 509 ADVANCE_TO(RCDATAEndTagOpenState); 510 } else { 511 emitCharacter('<'); 512 RECONSUME_IN(RCDATAState); 513 } 514 break; 515 } 516 END_STATE() 517 518 BEGIN_STATE(RCDATAEndTagOpenState) { 519 if (cc >= 'A' && cc <= 'Z') { 520 m_temporaryBuffer.append(cc); 521 addToPossibleEndTag(toLowerCase(cc)); 522 ADVANCE_TO(RCDATAEndTagNameState); 523 } else if (cc >= 'a' && cc <= 'z') { 524 m_temporaryBuffer.append(cc); 525 addToPossibleEndTag(cc); 526 ADVANCE_TO(RCDATAEndTagNameState); 527 } else { 528 emitCharacter('<'); 529 emitCharacter('/'); 530 RECONSUME_IN(RCDATAState); 531 } 532 break; 533 } 534 END_STATE() 535 536 BEGIN_STATE(RCDATAEndTagNameState) { 537 if (cc >= 'A' && cc <= 'Z') { 538 m_temporaryBuffer.append(cc); 539 addToPossibleEndTag(toLowerCase(cc)); 540 } else if (cc >= 'a' && cc <= 'z') { 541 m_temporaryBuffer.append(cc); 542 addToPossibleEndTag(cc); 543 } else { 544 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') { 545 if (isAppropriateEndTag()) { 546 FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); 547 } 548 } else if (cc == '/') { 549 if (isAppropriateEndTag()) { 550 FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); 551 } 552 } else if (cc == '>') { 553 if (isAppropriateEndTag()) { 554 FLUSH_EMIT_AND_RESUME_IN(DataState); 555 } 556 } 557 emitCharacter('<'); 558 emitCharacter('/'); 559 m_token->appendToCharacter(m_temporaryBuffer); 560 m_bufferedEndTagName.clear(); 561 RECONSUME_IN(RCDATAState); 562 } 563 break; 564 } 565 END_STATE() 566 567 BEGIN_STATE(RAWTEXTLessThanSignState) { 568 if (cc == '/') { 569 m_temporaryBuffer.clear(); 570 ASSERT(m_bufferedEndTagName.isEmpty()); 571 ADVANCE_TO(RAWTEXTEndTagOpenState); 572 } else { 573 emitCharacter('<'); 574 RECONSUME_IN(RAWTEXTState); 575 } 576 break; 577 } 578 END_STATE() 579 580 BEGIN_STATE(RAWTEXTEndTagOpenState) { 581 if (cc >= 'A' && cc <= 'Z') { 582 m_temporaryBuffer.append(cc); 583 addToPossibleEndTag(toLowerCase(cc)); 584 ADVANCE_TO(RAWTEXTEndTagNameState); 585 } else if (cc >= 'a' && cc <= 'z') { 586 m_temporaryBuffer.append(cc); 587 addToPossibleEndTag(cc); 588 ADVANCE_TO(RAWTEXTEndTagNameState); 589 } else { 590 emitCharacter('<'); 591 emitCharacter('/'); 592 RECONSUME_IN(RAWTEXTState); 593 } 594 break; 595 } 596 END_STATE() 597 598 BEGIN_STATE(RAWTEXTEndTagNameState) { 599 if (cc >= 'A' && cc <= 'Z') { 600 m_temporaryBuffer.append(cc); 601 addToPossibleEndTag(toLowerCase(cc)); 602 } else if (cc >= 'a' && cc <= 'z') { 603 m_temporaryBuffer.append(cc); 604 addToPossibleEndTag(cc); 605 } else { 606 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') { 607 if (isAppropriateEndTag()) { 608 FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); 609 } 610 } else if (cc == '/') { 611 if (isAppropriateEndTag()) { 612 FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); 613 } 614 } else if (cc == '>') { 615 if (isAppropriateEndTag()) { 616 FLUSH_EMIT_AND_RESUME_IN(DataState); 617 } 618 } 619 emitCharacter('<'); 620 emitCharacter('/'); 621 m_token->appendToCharacter(m_temporaryBuffer); 622 m_bufferedEndTagName.clear(); 623 RECONSUME_IN(RAWTEXTState); 624 } 625 break; 626 } 627 END_STATE() 628 629 BEGIN_STATE(ScriptDataLessThanSignState) { 630 if (cc == '/') { 631 m_temporaryBuffer.clear(); 632 ASSERT(m_bufferedEndTagName.isEmpty()); 633 ADVANCE_TO(ScriptDataEndTagOpenState); 634 } else if (cc == '!') { 635 emitCharacter('<'); 636 emitCharacter('!'); 637 ADVANCE_TO(ScriptDataEscapeStartState); 638 } else { 639 emitCharacter('<'); 640 RECONSUME_IN(ScriptDataState); 641 } 642 break; 643 } 644 END_STATE() 645 646 BEGIN_STATE(ScriptDataEndTagOpenState) { 647 if (cc >= 'A' && cc <= 'Z') { 648 m_temporaryBuffer.append(cc); 649 addToPossibleEndTag(toLowerCase(cc)); 650 ADVANCE_TO(ScriptDataEndTagNameState); 651 } else if (cc >= 'a' && cc <= 'z') { 652 m_temporaryBuffer.append(cc); 653 addToPossibleEndTag(cc); 654 ADVANCE_TO(ScriptDataEndTagNameState); 655 } else { 656 emitCharacter('<'); 657 emitCharacter('/'); 658 RECONSUME_IN(ScriptDataState); 659 } 660 break; 661 } 662 END_STATE() 663 664 BEGIN_STATE(ScriptDataEndTagNameState) { 665 if (cc >= 'A' && cc <= 'Z') { 666 m_temporaryBuffer.append(cc); 667 addToPossibleEndTag(toLowerCase(cc)); 668 } else if (cc >= 'a' && cc <= 'z') { 669 m_temporaryBuffer.append(cc); 670 addToPossibleEndTag(cc); 671 } else { 672 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') { 673 if (isAppropriateEndTag()) { 674 FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); 675 } 676 } else if (cc == '/') { 677 if (isAppropriateEndTag()) { 678 FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); 679 } 680 } else if (cc == '>') { 681 if (isAppropriateEndTag()) { 682 FLUSH_EMIT_AND_RESUME_IN(DataState); 683 } 684 } 685 emitCharacter('<'); 686 emitCharacter('/'); 687 m_token->appendToCharacter(m_temporaryBuffer); 688 m_bufferedEndTagName.clear(); 689 RECONSUME_IN(ScriptDataState); 690 } 691 break; 692 } 693 END_STATE() 694 695 BEGIN_STATE(ScriptDataEscapeStartState) { 696 if (cc == '-') { 697 emitCharacter(cc); 698 ADVANCE_TO(ScriptDataEscapeStartDashState); 699 } else { 700 RECONSUME_IN(ScriptDataState); 701 } 702 break; 703 } 704 END_STATE() 705 706 BEGIN_STATE(ScriptDataEscapeStartDashState) { 707 if (cc == '-') { 708 emitCharacter(cc); 709 ADVANCE_TO(ScriptDataEscapedDashDashState); 710 } else { 711 RECONSUME_IN(ScriptDataState); 712 } 713 break; 714 } 715 END_STATE() 716 717 BEGIN_STATE(ScriptDataEscapedState) { 718 if (cc == '-') { 719 emitCharacter(cc); 720 ADVANCE_TO(ScriptDataEscapedDashState); 721 } else if (cc == '<') 722 ADVANCE_TO(ScriptDataEscapedLessThanSignState); 723 else 724 emitCharacter(cc); 725 // FIXME: Handle EOF properly. 726 break; 727 } 728 END_STATE() 729 730 BEGIN_STATE(ScriptDataEscapedDashState) { 731 if (cc == '-') { 732 emitCharacter(cc); 733 ADVANCE_TO(ScriptDataEscapedDashDashState); 734 } else if (cc == '<') 735 ADVANCE_TO(ScriptDataEscapedLessThanSignState); 736 else { 737 emitCharacter(cc); 738 ADVANCE_TO(ScriptDataEscapedState); 739 } 740 // FIXME: Handle EOF properly. 741 break; 742 } 743 END_STATE() 744 745 BEGIN_STATE(ScriptDataEscapedDashDashState) { 746 if (cc == '-') 747 emitCharacter(cc); 748 else if (cc == '<') 749 ADVANCE_TO(ScriptDataEscapedLessThanSignState); 750 else if (cc == '>') { 751 emitCharacter(cc); 752 ADVANCE_TO(ScriptDataState); 753 } else { 754 emitCharacter(cc); 755 ADVANCE_TO(ScriptDataEscapedState); 756 } 757 // FIXME: Handle EOF properly. 758 break; 759 } 760 END_STATE() 761 762 BEGIN_STATE(ScriptDataEscapedLessThanSignState) { 763 if (cc == '/') { 764 m_temporaryBuffer.clear(); 765 ASSERT(m_bufferedEndTagName.isEmpty()); 766 ADVANCE_TO(ScriptDataEscapedEndTagOpenState); 767 } else if (cc >= 'A' && cc <= 'Z') { 768 emitCharacter('<'); 769 emitCharacter(cc); 770 m_temporaryBuffer.clear(); 771 m_temporaryBuffer.append(toLowerCase(cc)); 772 ADVANCE_TO(ScriptDataDoubleEscapeStartState); 773 } else if (cc >= 'a' && cc <= 'z') { 774 emitCharacter('<'); 775 emitCharacter(cc); 776 m_temporaryBuffer.clear(); 777 m_temporaryBuffer.append(cc); 778 ADVANCE_TO(ScriptDataDoubleEscapeStartState); 779 } else { 780 emitCharacter('<'); 781 RECONSUME_IN(ScriptDataEscapedState); 782 } 783 break; 784 } 785 END_STATE() 786 787 BEGIN_STATE(ScriptDataEscapedEndTagOpenState) { 788 if (cc >= 'A' && cc <= 'Z') { 789 m_temporaryBuffer.append(cc); 790 addToPossibleEndTag(toLowerCase(cc)); 791 ADVANCE_TO(ScriptDataEscapedEndTagNameState); 792 } else if (cc >= 'a' && cc <= 'z') { 793 m_temporaryBuffer.append(cc); 794 addToPossibleEndTag(cc); 795 ADVANCE_TO(ScriptDataEscapedEndTagNameState); 796 } else { 797 emitCharacter('<'); 798 emitCharacter('/'); 799 RECONSUME_IN(ScriptDataEscapedState); 800 } 801 break; 802 } 803 END_STATE() 804 805 BEGIN_STATE(ScriptDataEscapedEndTagNameState) { 806 if (cc >= 'A' && cc <= 'Z') { 807 m_temporaryBuffer.append(cc); 808 addToPossibleEndTag(toLowerCase(cc)); 809 } else if (cc >= 'a' && cc <= 'z') { 810 m_temporaryBuffer.append(cc); 811 addToPossibleEndTag(cc); 812 } else { 813 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') { 814 if (isAppropriateEndTag()) { 815 FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); 816 } 817 } else if (cc == '/') { 818 if (isAppropriateEndTag()) { 819 FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); 820 } 821 } else if (cc == '>') { 822 if (isAppropriateEndTag()) { 823 FLUSH_EMIT_AND_RESUME_IN(DataState); 824 } 825 } 826 emitCharacter('<'); 827 emitCharacter('/'); 828 m_token->appendToCharacter(m_temporaryBuffer); 829 m_bufferedEndTagName.clear(); 830 RECONSUME_IN(ScriptDataEscapedState); 831 } 832 break; 833 } 834 END_STATE() 835 836 BEGIN_STATE(ScriptDataDoubleEscapeStartState) { 837 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ' || cc == '/' || cc == '>') { 838 emitCharacter(cc); 839 if (temporaryBufferIs(scriptTag.localName())) 840 ADVANCE_TO(ScriptDataDoubleEscapedState); 841 else 842 ADVANCE_TO(ScriptDataEscapedState); 843 } else if (cc >= 'A' && cc <= 'Z') { 844 emitCharacter(cc); 845 m_temporaryBuffer.append(toLowerCase(cc)); 846 } else if (cc >= 'a' && cc <= 'z') { 847 emitCharacter(cc); 848 m_temporaryBuffer.append(cc); 849 } else { 850 RECONSUME_IN(ScriptDataEscapedState); 851 } 852 break; 853 } 854 END_STATE() 855 856 BEGIN_STATE(ScriptDataDoubleEscapedState) { 857 if (cc == '-') { 858 emitCharacter(cc); 859 ADVANCE_TO(ScriptDataDoubleEscapedDashState); 860 } else if (cc == '<') { 861 emitCharacter(cc); 862 ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState); 863 } else 864 emitCharacter(cc); 865 // FIXME: Handle EOF properly. 866 break; 867 } 868 END_STATE() 869 870 BEGIN_STATE(ScriptDataDoubleEscapedDashState) { 871 if (cc == '-') { 872 emitCharacter(cc); 873 ADVANCE_TO(ScriptDataDoubleEscapedDashDashState); 874 } else if (cc == '<') { 875 emitCharacter(cc); 876 ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState); 877 } else { 878 emitCharacter(cc); 879 ADVANCE_TO(ScriptDataDoubleEscapedState); 880 } 881 // FIXME: Handle EOF properly. 882 break; 883 } 884 END_STATE() 885 886 BEGIN_STATE(ScriptDataDoubleEscapedDashDashState) { 887 if (cc == '-') 888 emitCharacter(cc); 889 else if (cc == '<') { 890 emitCharacter(cc); 891 ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState); 892 } else if (cc == '>') { 893 emitCharacter(cc); 894 ADVANCE_TO(ScriptDataState); 895 } else { 896 emitCharacter(cc); 897 ADVANCE_TO(ScriptDataDoubleEscapedState); 898 } 899 // FIXME: Handle EOF properly. 900 break; 901 } 902 END_STATE() 903 904 BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState) { 905 if (cc == '/') { 906 emitCharacter(cc); 907 m_temporaryBuffer.clear(); 908 ADVANCE_TO(ScriptDataDoubleEscapeEndState); 909 } else { 910 RECONSUME_IN(ScriptDataDoubleEscapedState); 911 } 912 break; 913 } 914 END_STATE() 915 916 BEGIN_STATE(ScriptDataDoubleEscapeEndState) { 917 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ' || cc == '/' || cc == '>') { 918 emitCharacter(cc); 919 if (temporaryBufferIs(scriptTag.localName())) 920 ADVANCE_TO(ScriptDataEscapedState); 921 else 922 ADVANCE_TO(ScriptDataDoubleEscapedState); 923 } else if (cc >= 'A' && cc <= 'Z') { 924 emitCharacter(cc); 925 m_temporaryBuffer.append(toLowerCase(cc)); 926 } else if (cc >= 'a' && cc <= 'z') { 927 emitCharacter(cc); 928 m_temporaryBuffer.append(cc); 929 } else { 930 RECONSUME_IN(ScriptDataDoubleEscapedState); 931 } 932 break; 933 } 934 END_STATE() 935 936 BEGIN_STATE(BeforeAttributeNameState) { 937 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 938 break; 939 else if (cc == '/') 940 ADVANCE_TO(SelfClosingStartTagState); 941 else if (cc == '>') { 942 EMIT_AND_RESUME_IN(DataState); 943 } else if (cc >= 'A' && cc <= 'Z') { 944 m_token->addNewAttribute(); 945 m_token->appendToAttributeName(toLowerCase(cc)); 946 ADVANCE_TO(AttributeNameState); 947 } else { 948 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') 949 emitParseError(); 950 m_token->addNewAttribute(); 951 m_token->appendToAttributeName(cc); 952 ADVANCE_TO(AttributeNameState); 953 } 954 // FIXME: Handle EOF properly. 955 break; 956 } 957 END_STATE() 958 959 BEGIN_STATE(AttributeNameState) { 960 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 961 ADVANCE_TO(AfterAttributeNameState); 962 else if (cc == '/') 963 ADVANCE_TO(SelfClosingStartTagState); 964 else if (cc == '=') 965 ADVANCE_TO(BeforeAttributeValueState); 966 else if (cc == '>') { 967 EMIT_AND_RESUME_IN(DataState); 968 } else if (cc >= 'A' && cc <= 'Z') 969 m_token->appendToAttributeName(toLowerCase(cc)); 970 else { 971 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') 972 emitParseError(); 973 m_token->appendToAttributeName(cc); 974 ADVANCE_TO(AttributeNameState); 975 } 976 // FIXME: Handle EOF properly. 977 break; 978 } 979 END_STATE() 980 981 BEGIN_STATE(AfterAttributeNameState) { 982 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 983 break; 984 else if (cc == '/') 985 ADVANCE_TO(SelfClosingStartTagState); 986 else if (cc == '=') 987 ADVANCE_TO(BeforeAttributeValueState); 988 else if (cc == '=') { 989 EMIT_AND_RESUME_IN(DataState); 990 } else if (cc >= 'A' && cc <= 'Z') { 991 m_token->addNewAttribute(); 992 m_token->appendToAttributeName(toLowerCase(cc)); 993 ADVANCE_TO(AttributeNameState); 994 } else { 995 if (cc == '"' || cc == '\'' || cc == '<') 996 emitParseError(); 997 m_token->addNewAttribute(); 998 m_token->appendToAttributeName(cc); 999 ADVANCE_TO(AttributeNameState); 1000 } 1001 // FIXME: Handle EOF properly. 1002 break; 1003 } 1004 END_STATE() 1005 1006 BEGIN_STATE(BeforeAttributeValueState) { 1007 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1008 break; 1009 else if (cc == '"') 1010 ADVANCE_TO(AttributeValueDoubleQuotedState); 1011 else if (cc == '&') { 1012 RECONSUME_IN(AttributeValueUnquotedState); 1013 } else if (cc == '\'') 1014 ADVANCE_TO(AttributeValueSingleQuotedState); 1015 else if (cc == '>') { 1016 emitParseError(); 1017 EMIT_AND_RESUME_IN(DataState); 1018 } else { 1019 if (cc == '<' || cc == '=' || cc == '`') 1020 emitParseError(); 1021 m_token->appendToAttributeValue(cc); 1022 ADVANCE_TO(AttributeValueUnquotedState); 1023 } 1024 break; 1025 } 1026 END_STATE() 1027 1028 BEGIN_STATE(AttributeValueDoubleQuotedState) { 1029 if (cc == '"') 1030 ADVANCE_TO(AfterAttributeValueQuotedState); 1031 else if (cc == '&') { 1032 m_additionalAllowedCharacter = '"'; 1033 ADVANCE_TO(CharacterReferenceInAttributeValueState); 1034 } else 1035 m_token->appendToAttributeValue(cc); 1036 // FIXME: Handle EOF properly. 1037 break; 1038 } 1039 END_STATE() 1040 1041 BEGIN_STATE(AttributeValueSingleQuotedState) { 1042 if (cc == '\'') 1043 ADVANCE_TO(AfterAttributeValueQuotedState); 1044 else if (cc == '&') { 1045 m_additionalAllowedCharacter = '\''; 1046 ADVANCE_TO(CharacterReferenceInAttributeValueState); 1047 } else 1048 m_token->appendToAttributeValue(cc); 1049 // FIXME: Handle EOF properly. 1050 break; 1051 } 1052 END_STATE() 1053 1054 BEGIN_STATE(AttributeValueUnquotedState) { 1055 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1056 ADVANCE_TO(BeforeAttributeNameState); 1057 else if (cc == '&') { 1058 m_additionalAllowedCharacter = '>'; 1059 ADVANCE_TO(CharacterReferenceInAttributeValueState); 1060 } else if (cc == '>') { 1061 EMIT_AND_RESUME_IN(DataState); 1062 } else { 1063 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`') 1064 emitParseError(); 1065 m_token->appendToAttributeValue(cc); 1066 } 1067 // FIXME: Handle EOF properly. 1068 break; 1069 } 1070 END_STATE() 1071 1072 BEGIN_STATE(CharacterReferenceInAttributeValueState) { 1073 bool notEnoughCharacters = false; 1074 unsigned value = consumeEntity(source, notEnoughCharacters); 1075 if (notEnoughCharacters) 1076 return shouldEmitBufferedCharacterToken(source); 1077 if (!value) 1078 m_token->appendToAttributeValue('&'); 1079 else if (value < 0xFFFF) 1080 m_token->appendToAttributeValue(value); 1081 else { 1082 m_token->appendToAttributeValue(U16_LEAD(value)); 1083 m_token->appendToAttributeValue(U16_TRAIL(value)); 1084 } 1085 // We're supposed to switch back to the attribute value state that 1086 // we were in when we were switched into this state. Rather than 1087 // keeping track of this explictly, we observe that the previous 1088 // state can be determined by m_additionalAllowedCharacter. 1089 if (m_additionalAllowedCharacter == '"') 1090 RECONSUME_IN(AttributeValueDoubleQuotedState) 1091 else if (m_additionalAllowedCharacter == '\'') 1092 RECONSUME_IN(AttributeValueSingleQuotedState) 1093 else if (m_additionalAllowedCharacter == '>') 1094 RECONSUME_IN(AttributeValueUnquotedState) 1095 else 1096 ASSERT_NOT_REACHED(); 1097 break; 1098 } 1099 END_STATE() 1100 1101 BEGIN_STATE(AfterAttributeValueQuotedState) { 1102 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1103 ADVANCE_TO(BeforeAttributeNameState); 1104 else if (cc == '/') 1105 ADVANCE_TO(SelfClosingStartTagState); 1106 else if (cc == '>') { 1107 EMIT_AND_RESUME_IN(DataState); 1108 } else { 1109 emitParseError(); 1110 RECONSUME_IN(BeforeAttributeNameState); 1111 } 1112 // FIXME: Handle EOF properly. 1113 break; 1114 } 1115 END_STATE() 1116 1117 BEGIN_STATE(SelfClosingStartTagState) { 1118 if (cc == '>') { 1119 notImplemented(); 1120 EMIT_AND_RESUME_IN(DataState); 1121 } else { 1122 emitParseError(); 1123 RECONSUME_IN(BeforeAttributeNameState); 1124 } 1125 // FIXME: Handle EOF properly. 1126 break; 1127 } 1128 END_STATE() 1129 1130 BEGIN_STATE(BogusCommentState) { 1131 m_token->beginComment(); 1132 while (!source.isEmpty()) { 1133 cc = *source; 1134 if (cc == '>') 1135 break; 1136 m_token->appendToComment(cc); 1137 source.advance(m_lineNumber); 1138 } 1139 EMIT_AND_RESUME_IN(DataState); 1140 if (source.isEmpty()) 1141 return true; 1142 // FIXME: Handle EOF properly. 1143 break; 1144 } 1145 END_STATE() 1146 1147 BEGIN_STATE(MarkupDeclarationOpenState) { 1148 DEFINE_STATIC_LOCAL(String, dashDashString, ("--")); 1149 DEFINE_STATIC_LOCAL(String, doctypeString, ("doctype")); 1150 if (cc == '-') { 1151 SegmentedString::LookAheadResult result = source.lookAhead(dashDashString); 1345 if (cc == '>') { 1346 EMIT_AND_RESUME_IN(DataState); 1347 } else { 1348 DEFINE_STATIC_LOCAL(String, publicString, ("public")); 1349 DEFINE_STATIC_LOCAL(String, systemString, ("system")); 1350 if (cc == 'P' || cc == 'p') { 1351 SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(publicString); 1152 1352 if (result == SegmentedString::DidMatch) { 1153 source.advanceAndASSERT('-'); 1154 source.advanceAndASSERT('-'); 1155 m_token->beginComment(); 1156 RECONSUME_IN(CommentStartState); 1353 advanceStringAndASSERTIgnoringCase(source, "public"); 1354 RECONSUME_IN(AfterDOCTYPEPublicKeywordState); 1157 1355 } else if (result == SegmentedString::NotEnoughCharacters) 1158 1356 return shouldEmitBufferedCharacterToken(source); 1159 } else if (cc == ' D' || cc == 'd') {1160 SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase( doctypeString);1357 } else if (cc == 'S' || cc == 's') { 1358 SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(systemString); 1161 1359 if (result == SegmentedString::DidMatch) { 1162 advanceStringAndASSERTIgnoringCase(source, " doctype");1163 RECONSUME_IN( DOCTYPEState);1360 advanceStringAndASSERTIgnoringCase(source, "system"); 1361 RECONSUME_IN(AfterDOCTYPESystemKeywordState); 1164 1362 } else if (result == SegmentedString::NotEnoughCharacters) 1165 1363 return shouldEmitBufferedCharacterToken(source); 1166 1364 } 1365 emitParseError(); 1167 1366 notImplemented(); 1168 // FIXME: We're still missing the bits about the insertion mode being in foreign content: 1169 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#markup-declaration-open-state 1170 emitParseError(); 1171 RECONSUME_IN(BogusCommentState); 1172 } 1173 END_STATE() 1174 1175 BEGIN_STATE(CommentStartState) { 1176 if (cc == '-') 1177 ADVANCE_TO(CommentStartDashState); 1178 else if (cc == '>') { 1179 emitParseError(); 1180 EMIT_AND_RESUME_IN(DataState); 1181 } else { 1182 m_token->appendToComment(cc); 1183 ADVANCE_TO(CommentState); 1184 } 1185 // FIXME: Handle EOF properly. 1367 ADVANCE_TO(BogusDOCTYPEState); 1368 } 1369 // FIXME: Handle EOF properly. 1370 break; 1371 } 1372 END_STATE() 1373 1374 BEGIN_STATE(AfterDOCTYPEPublicKeywordState) { 1375 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1376 ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState); 1377 else if (cc == '"') { 1378 emitParseError(); 1379 m_token->setPublicIdentifierToEmptyString(); 1380 ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); 1381 } else if (cc == '\'') { 1382 emitParseError(); 1383 m_token->setPublicIdentifierToEmptyString(); 1384 ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); 1385 } else if (cc == '>') { 1386 emitParseError(); 1387 notImplemented(); 1388 EMIT_AND_RESUME_IN(DataState); 1389 } else { 1390 emitParseError(); 1391 notImplemented(); 1392 ADVANCE_TO(BogusDOCTYPEState); 1393 } 1394 // FIXME: Handle EOF properly. 1395 break; 1396 } 1397 END_STATE() 1398 1399 BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) { 1400 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1186 1401 break; 1187 } 1188 END_STATE() 1189 1190 BEGIN_STATE(CommentStartDashState) { 1191 if (cc == '-') 1192 ADVANCE_TO(CommentEndState); 1193 else if (cc == '>') { 1194 emitParseError(); 1195 EMIT_AND_RESUME_IN(DataState); 1196 } else { 1197 m_token->appendToComment('-'); 1198 m_token->appendToComment(cc); 1199 ADVANCE_TO(CommentState); 1200 } 1201 // FIXME: Handle EOF properly. 1402 else if (cc == '"') { 1403 m_token->setPublicIdentifierToEmptyString(); 1404 ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); 1405 } else if (cc == '\'') { 1406 m_token->setPublicIdentifierToEmptyString(); 1407 ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); 1408 } else if (cc == '>') { 1409 emitParseError(); 1410 notImplemented(); 1411 EMIT_AND_RESUME_IN(DataState); 1412 } else { 1413 emitParseError(); 1414 notImplemented(); 1415 ADVANCE_TO(BogusDOCTYPEState); 1416 } 1417 // FIXME: Handle EOF properly. 1418 break; 1419 } 1420 END_STATE() 1421 1422 BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) { 1423 if (cc == '"') 1424 ADVANCE_TO(AfterDOCTYPEPublicIdentifierState); 1425 else if (cc == '>') { 1426 emitParseError(); 1427 notImplemented(); 1428 EMIT_AND_RESUME_IN(DataState); 1429 } else 1430 m_token->appendToPublicIdentifier(cc); 1431 // FIXME: Handle EOF properly. 1432 break; 1433 } 1434 END_STATE() 1435 1436 BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) { 1437 if (cc == '\'') 1438 ADVANCE_TO(AfterDOCTYPEPublicIdentifierState); 1439 else if (cc == '>') { 1440 emitParseError(); 1441 notImplemented(); 1442 EMIT_AND_RESUME_IN(DataState); 1443 } else 1444 m_token->appendToPublicIdentifier(cc); 1445 // FIXME: Handle EOF properly. 1446 break; 1447 } 1448 END_STATE() 1449 1450 BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) { 1451 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1452 ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState); 1453 else if (cc == '>') { 1454 EMIT_AND_RESUME_IN(DataState); 1455 } else if (cc == '"') { 1456 emitParseError(); 1457 m_token->setPublicIdentifierToEmptyString(); 1458 ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); 1459 } else if (cc == '\'') { 1460 emitParseError(); 1461 m_token->setPublicIdentifierToEmptyString(); 1462 ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); 1463 } else { 1464 emitParseError(); 1465 notImplemented(); 1466 ADVANCE_TO(BogusDOCTYPEState); 1467 } 1468 // FIXME: Handle EOF properly. 1469 break; 1470 } 1471 END_STATE() 1472 1473 BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) { 1474 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1475 ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState); 1476 else if (cc == '>') { 1477 EMIT_AND_RESUME_IN(DataState); 1478 } else if (cc == '"') { 1479 m_token->setSystemIdentifierToEmptyString(); 1480 ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); 1481 } else if (cc == '\'') { 1482 m_token->setSystemIdentifierToEmptyString(); 1483 ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); 1484 } else { 1485 emitParseError(); 1486 notImplemented(); 1487 ADVANCE_TO(BogusDOCTYPEState); 1488 } 1489 // FIXME: Handle EOF properly. 1490 break; 1491 } 1492 END_STATE() 1493 1494 BEGIN_STATE(AfterDOCTYPESystemKeywordState) { 1495 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1496 ADVANCE_TO(BeforeDOCTYPESystemIdentifierState); 1497 else if (cc == '"') { 1498 emitParseError(); 1499 m_token->setSystemIdentifierToEmptyString(); 1500 ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); 1501 } else if (cc == '\'') { 1502 emitParseError(); 1503 m_token->setSystemIdentifierToEmptyString(); 1504 ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); 1505 } else if (cc == '>') { 1506 emitParseError(); 1507 notImplemented(); 1508 EMIT_AND_RESUME_IN(DataState); 1509 } else { 1510 emitParseError(); 1511 notImplemented(); 1512 ADVANCE_TO(BogusDOCTYPEState); 1513 } 1514 // FIXME: Handle EOF properly. 1515 break; 1516 } 1517 END_STATE() 1518 1519 BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) { 1520 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1202 1521 break; 1203 } 1204 END_STATE() 1205 1206 BEGIN_STATE(CommentState) { 1207 if (cc == '-') 1208 ADVANCE_TO(CommentEndDashState); 1209 else 1210 m_token->appendToComment(cc); 1211 // FIXME: Handle EOF properly. 1522 if (cc == '"') { 1523 m_token->setSystemIdentifierToEmptyString(); 1524 ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); 1525 } else if (cc == '\'') { 1526 m_token->setSystemIdentifierToEmptyString(); 1527 ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); 1528 } else if (cc == '>') { 1529 emitParseError(); 1530 notImplemented(); 1531 EMIT_AND_RESUME_IN(DataState); 1532 } else { 1533 emitParseError(); 1534 notImplemented(); 1535 ADVANCE_TO(BogusDOCTYPEState); 1536 } 1537 // FIXME: Handle EOF properly. 1538 break; 1539 } 1540 END_STATE() 1541 1542 BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) { 1543 if (cc == '"') 1544 ADVANCE_TO(AfterDOCTYPESystemIdentifierState); 1545 else if (cc == '>') { 1546 emitParseError(); 1547 notImplemented(); 1548 EMIT_AND_RESUME_IN(DataState); 1549 } else 1550 m_token->appendToSystemIdentifier(cc); 1551 // FIXME: Handle EOF properly. 1552 break; 1553 } 1554 END_STATE() 1555 1556 BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) { 1557 if (cc == '\'') 1558 ADVANCE_TO(AfterDOCTYPESystemIdentifierState); 1559 else if (cc == '>') { 1560 emitParseError(); 1561 notImplemented(); 1562 EMIT_AND_RESUME_IN(DataState); 1563 } else 1564 m_token->appendToSystemIdentifier(cc); 1565 // FIXME: Handle EOF properly. 1566 break; 1567 } 1568 END_STATE() 1569 1570 BEGIN_STATE(AfterDOCTYPESystemIdentifierState) { 1571 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1212 1572 break; 1213 } 1214 END_STATE() 1215 1216 BEGIN_STATE(CommentEndDashState) { 1217 if (cc == '-') 1218 ADVANCE_TO(CommentEndState); 1219 else { 1220 m_token->appendToComment('-'); 1221 m_token->appendToComment(cc); 1222 ADVANCE_TO(CommentState); 1223 } 1224 // FIXME: Handle EOF properly. 1225 break; 1226 } 1227 END_STATE() 1228 1229 BEGIN_STATE(CommentEndState) { 1230 if (cc == '>') { 1231 EMIT_AND_RESUME_IN(DataState); 1232 } else if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') { 1233 emitParseError(); 1234 m_token->appendToComment('-'); 1235 m_token->appendToComment('-'); 1236 m_token->appendToComment(cc); 1237 ADVANCE_TO(CommentEndSpaceState); 1238 } else if (cc == '!') { 1239 emitParseError(); 1240 ADVANCE_TO(CommentEndBangState); 1241 } else if (cc == '-') { 1242 emitParseError(); 1243 m_token->appendToComment('-'); 1244 m_token->appendToComment(cc); 1245 } else { 1246 emitParseError(); 1247 m_token->appendToComment('-'); 1248 m_token->appendToComment('-'); 1249 m_token->appendToComment(cc); 1250 ADVANCE_TO(CommentState); 1251 } 1252 // FIXME: Handle EOF properly. 1253 break; 1254 } 1255 END_STATE() 1256 1257 BEGIN_STATE(CommentEndBangState) { 1258 if (cc == '-') { 1259 m_token->appendToComment('-'); 1260 m_token->appendToComment('-'); 1261 m_token->appendToComment('!'); 1262 ADVANCE_TO(CommentEndDashState); 1263 } else if (cc == '>') { 1264 EMIT_AND_RESUME_IN(DataState); 1265 } else { 1266 m_token->appendToComment('-'); 1267 m_token->appendToComment('-'); 1268 m_token->appendToComment('!'); 1269 m_token->appendToComment(cc); 1270 ADVANCE_TO(CommentState); 1271 } 1272 // FIXME: Handle EOF properly. 1273 break; 1274 } 1275 END_STATE() 1276 1277 BEGIN_STATE(CommentEndSpaceState) { 1278 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1279 m_token->appendToComment(cc); 1280 else if (cc == '-') 1281 ADVANCE_TO(CommentEndDashState); 1282 else if (cc == '>') { 1283 EMIT_AND_RESUME_IN(DataState); 1284 } else { 1285 m_token->appendToComment(cc); 1286 ADVANCE_TO(CommentState); 1287 } 1288 // FIXME: Handle EOF properly. 1289 break; 1290 } 1291 END_STATE() 1292 1293 BEGIN_STATE(DOCTYPEState) { 1294 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1295 ADVANCE_TO(BeforeDOCTYPENameState); 1296 else { 1297 emitParseError(); 1298 RECONSUME_IN(BeforeDOCTYPENameState); 1299 } 1300 // FIXME: Handle EOF properly. 1301 break; 1302 } 1303 END_STATE() 1304 1305 BEGIN_STATE(BeforeDOCTYPENameState) { 1306 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1307 break; 1308 else if (cc >= 'A' && cc <= 'Z') { 1309 m_token->beginDOCTYPE(toLowerCase(cc)); 1310 ADVANCE_TO(DOCTYPENameState); 1311 } else if (cc == '>') { 1312 emitParseError(); 1313 m_token->beginDOCTYPE(); 1314 notImplemented(); 1315 EMIT_AND_RESUME_IN(DataState); 1316 } else { 1317 m_token->beginDOCTYPE(cc); 1318 ADVANCE_TO(DOCTYPENameState); 1319 } 1320 // FIXME: Handle EOF properly. 1321 break; 1322 } 1323 END_STATE() 1324 1325 BEGIN_STATE(DOCTYPENameState) { 1326 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1327 ADVANCE_TO(AfterDOCTYPENameState); 1328 else if (cc == '>') { 1329 EMIT_AND_RESUME_IN(DataState); 1330 } else if (cc >= 'A' && cc <= 'Z') 1331 m_token->appendToName(toLowerCase(cc)); 1332 else 1333 m_token->appendToName(cc); 1334 // FIXME: Handle EOF properly. 1335 break; 1336 } 1337 END_STATE() 1338 1339 BEGIN_STATE(AfterDOCTYPENameState) { 1340 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1341 break; 1342 if (cc == '>') { 1343 EMIT_AND_RESUME_IN(DataState); 1344 } else { 1345 DEFINE_STATIC_LOCAL(String, publicString, ("public")); 1346 DEFINE_STATIC_LOCAL(String, systemString, ("system")); 1347 if (cc == 'P' || cc == 'p') { 1348 SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(publicString); 1349 if (result == SegmentedString::DidMatch) { 1350 advanceStringAndASSERTIgnoringCase(source, "public"); 1351 RECONSUME_IN(AfterDOCTYPEPublicKeywordState); 1352 } else if (result == SegmentedString::NotEnoughCharacters) 1353 return shouldEmitBufferedCharacterToken(source); 1354 } else if (cc == 'S' || cc == 's') { 1355 SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(systemString); 1356 if (result == SegmentedString::DidMatch) { 1357 advanceStringAndASSERTIgnoringCase(source, "system"); 1358 RECONSUME_IN(AfterDOCTYPESystemKeywordState); 1359 } else if (result == SegmentedString::NotEnoughCharacters) 1360 return shouldEmitBufferedCharacterToken(source); 1361 } 1362 emitParseError(); 1363 notImplemented(); 1364 ADVANCE_TO(BogusDOCTYPEState); 1365 } 1366 // FIXME: Handle EOF properly. 1367 break; 1368 } 1369 END_STATE() 1370 1371 BEGIN_STATE(AfterDOCTYPEPublicKeywordState) { 1372 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1373 ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState); 1374 else if (cc == '"') { 1375 emitParseError(); 1376 m_token->setPublicIdentifierToEmptyString(); 1377 ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); 1378 } else if (cc == '\'') { 1379 emitParseError(); 1380 m_token->setPublicIdentifierToEmptyString(); 1381 ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); 1382 } else if (cc == '>') { 1383 emitParseError(); 1384 notImplemented(); 1385 EMIT_AND_RESUME_IN(DataState); 1386 } else { 1387 emitParseError(); 1388 notImplemented(); 1389 ADVANCE_TO(BogusDOCTYPEState); 1390 } 1391 // FIXME: Handle EOF properly. 1392 break; 1393 } 1394 END_STATE() 1395 1396 BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) { 1397 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1398 break; 1399 else if (cc == '"') { 1400 m_token->setPublicIdentifierToEmptyString(); 1401 ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); 1402 } else if (cc == '\'') { 1403 m_token->setPublicIdentifierToEmptyString(); 1404 ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); 1405 } else if (cc == '>') { 1406 emitParseError(); 1407 notImplemented(); 1408 EMIT_AND_RESUME_IN(DataState); 1409 } else { 1410 emitParseError(); 1411 notImplemented(); 1412 ADVANCE_TO(BogusDOCTYPEState); 1413 } 1414 // FIXME: Handle EOF properly. 1415 break; 1416 } 1417 END_STATE() 1418 1419 BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) { 1420 if (cc == '"') 1421 ADVANCE_TO(AfterDOCTYPEPublicIdentifierState); 1422 else if (cc == '>') { 1423 emitParseError(); 1424 notImplemented(); 1425 EMIT_AND_RESUME_IN(DataState); 1426 } else 1427 m_token->appendToPublicIdentifier(cc); 1428 // FIXME: Handle EOF properly. 1429 break; 1430 } 1431 END_STATE() 1432 1433 BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) { 1434 if (cc == '\'') 1435 ADVANCE_TO(AfterDOCTYPEPublicIdentifierState); 1436 else if (cc == '>') { 1437 emitParseError(); 1438 notImplemented(); 1439 EMIT_AND_RESUME_IN(DataState); 1440 } else 1441 m_token->appendToPublicIdentifier(cc); 1442 // FIXME: Handle EOF properly. 1443 break; 1444 } 1445 END_STATE() 1446 1447 BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) { 1448 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1449 ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState); 1450 else if (cc == '>') { 1451 EMIT_AND_RESUME_IN(DataState); 1452 } else if (cc == '"') { 1453 emitParseError(); 1454 m_token->setPublicIdentifierToEmptyString(); 1455 ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); 1456 } else if (cc == '\'') { 1457 emitParseError(); 1458 m_token->setPublicIdentifierToEmptyString(); 1459 ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); 1460 } else { 1461 emitParseError(); 1462 notImplemented(); 1463 ADVANCE_TO(BogusDOCTYPEState); 1464 } 1465 // FIXME: Handle EOF properly. 1466 break; 1467 } 1468 END_STATE() 1469 1470 BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) { 1471 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1472 ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState); 1473 else if (cc == '>') { 1474 EMIT_AND_RESUME_IN(DataState); 1475 } else if (cc == '"') { 1476 m_token->setSystemIdentifierToEmptyString(); 1477 ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); 1478 } else if (cc == '\'') { 1479 m_token->setSystemIdentifierToEmptyString(); 1480 ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); 1481 } else { 1482 emitParseError(); 1483 notImplemented(); 1484 ADVANCE_TO(BogusDOCTYPEState); 1485 } 1486 // FIXME: Handle EOF properly. 1487 break; 1488 } 1489 END_STATE() 1490 1491 BEGIN_STATE(AfterDOCTYPESystemKeywordState) { 1492 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1493 ADVANCE_TO(BeforeDOCTYPESystemIdentifierState); 1494 else if (cc == '"') { 1495 emitParseError(); 1496 m_token->setSystemIdentifierToEmptyString(); 1497 ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); 1498 } else if (cc == '\'') { 1499 emitParseError(); 1500 m_token->setSystemIdentifierToEmptyString(); 1501 ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); 1502 } else if (cc == '>') { 1503 emitParseError(); 1504 notImplemented(); 1505 EMIT_AND_RESUME_IN(DataState); 1506 } else { 1507 emitParseError(); 1508 notImplemented(); 1509 ADVANCE_TO(BogusDOCTYPEState); 1510 } 1511 // FIXME: Handle EOF properly. 1512 break; 1513 } 1514 END_STATE() 1515 1516 BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) { 1517 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1518 break; 1519 if (cc == '"') { 1520 m_token->setSystemIdentifierToEmptyString(); 1521 ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); 1522 } else if (cc == '\'') { 1523 m_token->setSystemIdentifierToEmptyString(); 1524 ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); 1525 } else if (cc == '>') { 1526 emitParseError(); 1527 notImplemented(); 1528 EMIT_AND_RESUME_IN(DataState); 1529 } else { 1530 emitParseError(); 1531 notImplemented(); 1532 ADVANCE_TO(BogusDOCTYPEState); 1533 } 1534 // FIXME: Handle EOF properly. 1535 break; 1536 } 1537 END_STATE() 1538 1539 BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) { 1540 if (cc == '"') 1541 ADVANCE_TO(AfterDOCTYPESystemIdentifierState); 1542 else if (cc == '>') { 1543 emitParseError(); 1544 notImplemented(); 1545 EMIT_AND_RESUME_IN(DataState); 1546 } else 1547 m_token->appendToSystemIdentifier(cc); 1548 // FIXME: Handle EOF properly. 1549 break; 1550 } 1551 END_STATE() 1552 1553 BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) { 1554 if (cc == '\'') 1555 ADVANCE_TO(AfterDOCTYPESystemIdentifierState); 1556 else if (cc == '>') { 1557 emitParseError(); 1558 notImplemented(); 1559 EMIT_AND_RESUME_IN(DataState); 1560 } else 1561 m_token->appendToSystemIdentifier(cc); 1562 // FIXME: Handle EOF properly. 1563 break; 1564 } 1565 END_STATE() 1566 1567 BEGIN_STATE(AfterDOCTYPESystemIdentifierState) { 1568 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ') 1569 break; 1570 else if (cc == '>') { 1571 EMIT_AND_RESUME_IN(DataState); 1572 } else { 1573 emitParseError(); 1574 ADVANCE_TO(BogusDOCTYPEState); 1575 } 1576 // FIXME: Handle EOF properly. 1577 break; 1578 } 1579 END_STATE() 1580 1581 BEGIN_STATE(BogusDOCTYPEState) { 1582 if (cc == '>') { 1583 EMIT_AND_RESUME_IN(DataState); 1584 } 1585 // FIXME: Handle EOF properly. 1586 break; 1587 } 1588 END_STATE() 1589 1590 BEGIN_STATE(CDATASectionState) { 1591 notImplemented(); 1592 break; 1593 } 1594 END_STATE() 1595 1596 } 1597 source.advance(m_lineNumber); 1598 if (m_emitPending) { 1599 m_emitPending = false; 1600 return true; 1601 } 1602 } 1573 else if (cc == '>') { 1574 EMIT_AND_RESUME_IN(DataState); 1575 } else { 1576 emitParseError(); 1577 ADVANCE_TO(BogusDOCTYPEState); 1578 } 1579 // FIXME: Handle EOF properly. 1580 break; 1581 } 1582 END_STATE() 1583 1584 BEGIN_STATE(BogusDOCTYPEState) { 1585 if (cc == '>') { 1586 EMIT_AND_RESUME_IN(DataState); 1587 } 1588 // FIXME: Handle EOF properly. 1589 break; 1590 } 1591 END_STATE() 1592 1593 BEGIN_STATE(CDATASectionState) { 1594 notImplemented(); 1595 break; 1596 } 1597 END_STATE() 1598 1599 } 1600 source.advance(m_lineNumber); 1601 if (m_emitPending) { 1602 m_emitPending = false; 1603 return true; 1604 } 1605 1606 } // Matches the "while" above. 1607 1603 1608 // We've reached the end of the input stream. If we have a character 1604 1609 // token buffered, we should emit it.
Note: See TracChangeset
for help on using the changeset viewer.