Changeset 272570 in webkit
- Timestamp:
- Feb 9, 2021 2:02:13 AM (18 months ago)
- Location:
- trunk/Source/JavaScriptCore
- Files:
-
- 4 edited
-
ChangeLog (modified) (1 diff)
-
parser/Lexer.cpp (modified) (2 diffs)
-
runtime/LiteralParser.cpp (modified) (6 diffs)
-
runtime/LiteralParser.h (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/JavaScriptCore/ChangeLog
r272566 r272570 1 2021-02-09 Yusuke Suzuki <ysuzuki@apple.com> 2 3 [JSC] Make JSON.parse faster by using table for fast string parsing 4 https://bugs.webkit.org/show_bug.cgi?id=221593 5 6 Reviewed by Ryosuke Niwa and Geoffrey Garen. 7 8 We use Latin1 table for quickly checking whether a character is safe for the fast path string parsing in JSON. 9 This offers 1-3% improvement in Kraken json-parse-financial test. 10 11 * parser/Lexer.cpp: 12 (JSC::Lexer<T>::Lexer): 13 * runtime/LiteralParser.cpp: 14 (JSC::LiteralParser<CharType>::Lexer::lex): 15 (JSC::isSafeStringCharacter): 16 (JSC::LiteralParser<CharType>::Lexer::lexString): 17 * runtime/LiteralParser.h: 18 1 19 2021-02-08 Patrick Angle <pangle@apple.com> 2 20 -
trunk/Source/JavaScriptCore/parser/Lexer.cpp
r270481 r272570 45 45 } 46 46 47 enum CharacterType {47 enum CharacterType : uint8_t { 48 48 // Types for the main switch 49 49 … … 96 96 97 97 // 256 Latin-1 codes 98 static constexpr const unsigned shorttypesOfLatin1Characters[256] = {98 static constexpr const CharacterType typesOfLatin1Characters[256] = { 99 99 /* 0 - Null */ CharacterInvalid, 100 100 /* 1 - Start of Heading */ CharacterInvalid, -
trunk/Source/JavaScriptCore/runtime/LiteralParser.cpp
r264379 r272570 174 174 175 175 // 256 Latin-1 codes 176 static constexpr const TokenType TokenTypesOfLatin1Characters[256] = {176 static constexpr const TokenType tokenTypesOfLatin1Characters[256] = { 177 177 /* 0 - Null */ TokError, 178 178 /* 1 - Start of Heading */ TokError, … … 433 433 }; 434 434 435 // 256 Latin-1 codes 436 static constexpr const bool safeStringLatin1CharactersInStrictJSON[256] = { 437 /* 0 - Null */ false, 438 /* 1 - Start of Heading */ false, 439 /* 2 - Start of Text */ false, 440 /* 3 - End of Text */ false, 441 /* 4 - End of Transm. */ false, 442 /* 5 - Enquiry */ false, 443 /* 6 - Acknowledgment */ false, 444 /* 7 - Bell */ false, 445 /* 8 - Back Space */ false, 446 /* 9 - Horizontal Tab */ false, 447 /* 10 - Line Feed */ false, 448 /* 11 - Vertical Tab */ false, 449 /* 12 - Form Feed */ false, 450 /* 13 - Carriage Return */ false, 451 /* 14 - Shift Out */ false, 452 /* 15 - Shift In */ false, 453 /* 16 - Data Line Escape */ false, 454 /* 17 - Device Control 1 */ false, 455 /* 18 - Device Control 2 */ false, 456 /* 19 - Device Control 3 */ false, 457 /* 20 - Device Control 4 */ false, 458 /* 21 - Negative Ack. */ false, 459 /* 22 - Synchronous Idle */ false, 460 /* 23 - End of Transmit */ false, 461 /* 24 - Cancel */ false, 462 /* 25 - End of Medium */ false, 463 /* 26 - Substitute */ false, 464 /* 27 - Escape */ false, 465 /* 28 - File Separator */ false, 466 /* 29 - Group Separator */ false, 467 /* 30 - Record Separator */ false, 468 /* 31 - Unit Separator */ false, 469 /* 32 - Space */ true, 470 /* 33 - ! */ true, 471 /* 34 - " */ false, 472 /* 35 - # */ true, 473 /* 36 - $ */ true, 474 /* 37 - % */ true, 475 /* 38 - & */ true, 476 /* 39 - ' */ true, 477 /* 40 - ( */ true, 478 /* 41 - ) */ true, 479 /* 42 - * */ true, 480 /* 43 - + */ true, 481 /* 44 - , */ true, 482 /* 45 - - */ true, 483 /* 46 - . */ true, 484 /* 47 - / */ true, 485 /* 48 - 0 */ true, 486 /* 49 - 1 */ true, 487 /* 50 - 2 */ true, 488 /* 51 - 3 */ true, 489 /* 52 - 4 */ true, 490 /* 53 - 5 */ true, 491 /* 54 - 6 */ true, 492 /* 55 - 7 */ true, 493 /* 56 - 8 */ true, 494 /* 57 - 9 */ true, 495 /* 58 - : */ true, 496 /* 59 - ; */ true, 497 /* 60 - < */ true, 498 /* 61 - = */ true, 499 /* 62 - > */ true, 500 /* 63 - ? */ true, 501 /* 64 - @ */ true, 502 /* 65 - A */ true, 503 /* 66 - B */ true, 504 /* 67 - C */ true, 505 /* 68 - D */ true, 506 /* 69 - E */ true, 507 /* 70 - F */ true, 508 /* 71 - G */ true, 509 /* 72 - H */ true, 510 /* 73 - I */ true, 511 /* 74 - J */ true, 512 /* 75 - K */ true, 513 /* 76 - L */ true, 514 /* 77 - M */ true, 515 /* 78 - N */ true, 516 /* 79 - O */ true, 517 /* 80 - P */ true, 518 /* 81 - Q */ true, 519 /* 82 - R */ true, 520 /* 83 - S */ true, 521 /* 84 - T */ true, 522 /* 85 - U */ true, 523 /* 86 - V */ true, 524 /* 87 - W */ true, 525 /* 88 - X */ true, 526 /* 89 - Y */ true, 527 /* 90 - Z */ true, 528 /* 91 - [ */ true, 529 /* 92 - \ */ false, 530 /* 93 - ] */ true, 531 /* 94 - ^ */ true, 532 /* 95 - _ */ true, 533 /* 96 - ` */ true, 534 /* 97 - a */ true, 535 /* 98 - b */ true, 536 /* 99 - c */ true, 537 /* 100 - d */ true, 538 /* 101 - e */ true, 539 /* 102 - f */ true, 540 /* 103 - g */ true, 541 /* 104 - h */ true, 542 /* 105 - i */ true, 543 /* 106 - j */ true, 544 /* 107 - k */ true, 545 /* 108 - l */ true, 546 /* 109 - m */ true, 547 /* 110 - n */ true, 548 /* 111 - o */ true, 549 /* 112 - p */ true, 550 /* 113 - q */ true, 551 /* 114 - r */ true, 552 /* 115 - s */ true, 553 /* 116 - t */ true, 554 /* 117 - u */ true, 555 /* 118 - v */ true, 556 /* 119 - w */ true, 557 /* 120 - x */ true, 558 /* 121 - y */ true, 559 /* 122 - z */ true, 560 /* 123 - { */ true, 561 /* 124 - | */ true, 562 /* 125 - } */ true, 563 /* 126 - ~ */ true, 564 /* 127 - Delete */ true, 565 /* 128 - Cc category */ true, 566 /* 129 - Cc category */ true, 567 /* 130 - Cc category */ true, 568 /* 131 - Cc category */ true, 569 /* 132 - Cc category */ true, 570 /* 133 - Cc category */ true, 571 /* 134 - Cc category */ true, 572 /* 135 - Cc category */ true, 573 /* 136 - Cc category */ true, 574 /* 137 - Cc category */ true, 575 /* 138 - Cc category */ true, 576 /* 139 - Cc category */ true, 577 /* 140 - Cc category */ true, 578 /* 141 - Cc category */ true, 579 /* 142 - Cc category */ true, 580 /* 143 - Cc category */ true, 581 /* 144 - Cc category */ true, 582 /* 145 - Cc category */ true, 583 /* 146 - Cc category */ true, 584 /* 147 - Cc category */ true, 585 /* 148 - Cc category */ true, 586 /* 149 - Cc category */ true, 587 /* 150 - Cc category */ true, 588 /* 151 - Cc category */ true, 589 /* 152 - Cc category */ true, 590 /* 153 - Cc category */ true, 591 /* 154 - Cc category */ true, 592 /* 155 - Cc category */ true, 593 /* 156 - Cc category */ true, 594 /* 157 - Cc category */ true, 595 /* 158 - Cc category */ true, 596 /* 159 - Cc category */ true, 597 /* 160 - Zs category (nbsp) */ true, 598 /* 161 - Po category */ true, 599 /* 162 - Sc category */ true, 600 /* 163 - Sc category */ true, 601 /* 164 - Sc category */ true, 602 /* 165 - Sc category */ true, 603 /* 166 - So category */ true, 604 /* 167 - So category */ true, 605 /* 168 - Sk category */ true, 606 /* 169 - So category */ true, 607 /* 170 - Ll category */ true, 608 /* 171 - Pi category */ true, 609 /* 172 - Sm category */ true, 610 /* 173 - Cf category */ true, 611 /* 174 - So category */ true, 612 /* 175 - Sk category */ true, 613 /* 176 - So category */ true, 614 /* 177 - Sm category */ true, 615 /* 178 - No category */ true, 616 /* 179 - No category */ true, 617 /* 180 - Sk category */ true, 618 /* 181 - Ll category */ true, 619 /* 182 - So category */ true, 620 /* 183 - Po category */ true, 621 /* 184 - Sk category */ true, 622 /* 185 - No category */ true, 623 /* 186 - Ll category */ true, 624 /* 187 - Pf category */ true, 625 /* 188 - No category */ true, 626 /* 189 - No category */ true, 627 /* 190 - No category */ true, 628 /* 191 - Po category */ true, 629 /* 192 - Lu category */ true, 630 /* 193 - Lu category */ true, 631 /* 194 - Lu category */ true, 632 /* 195 - Lu category */ true, 633 /* 196 - Lu category */ true, 634 /* 197 - Lu category */ true, 635 /* 198 - Lu category */ true, 636 /* 199 - Lu category */ true, 637 /* 200 - Lu category */ true, 638 /* 201 - Lu category */ true, 639 /* 202 - Lu category */ true, 640 /* 203 - Lu category */ true, 641 /* 204 - Lu category */ true, 642 /* 205 - Lu category */ true, 643 /* 206 - Lu category */ true, 644 /* 207 - Lu category */ true, 645 /* 208 - Lu category */ true, 646 /* 209 - Lu category */ true, 647 /* 210 - Lu category */ true, 648 /* 211 - Lu category */ true, 649 /* 212 - Lu category */ true, 650 /* 213 - Lu category */ true, 651 /* 214 - Lu category */ true, 652 /* 215 - Sm category */ true, 653 /* 216 - Lu category */ true, 654 /* 217 - Lu category */ true, 655 /* 218 - Lu category */ true, 656 /* 219 - Lu category */ true, 657 /* 220 - Lu category */ true, 658 /* 221 - Lu category */ true, 659 /* 222 - Lu category */ true, 660 /* 223 - Ll category */ true, 661 /* 224 - Ll category */ true, 662 /* 225 - Ll category */ true, 663 /* 226 - Ll category */ true, 664 /* 227 - Ll category */ true, 665 /* 228 - Ll category */ true, 666 /* 229 - Ll category */ true, 667 /* 230 - Ll category */ true, 668 /* 231 - Ll category */ true, 669 /* 232 - Ll category */ true, 670 /* 233 - Ll category */ true, 671 /* 234 - Ll category */ true, 672 /* 235 - Ll category */ true, 673 /* 236 - Ll category */ true, 674 /* 237 - Ll category */ true, 675 /* 238 - Ll category */ true, 676 /* 239 - Ll category */ true, 677 /* 240 - Ll category */ true, 678 /* 241 - Ll category */ true, 679 /* 242 - Ll category */ true, 680 /* 243 - Ll category */ true, 681 /* 244 - Ll category */ true, 682 /* 245 - Ll category */ true, 683 /* 246 - Ll category */ true, 684 /* 247 - Sm category */ true, 685 /* 248 - Ll category */ true, 686 /* 249 - Ll category */ true, 687 /* 250 - Ll category */ true, 688 /* 251 - Ll category */ true, 689 /* 252 - Ll category */ true, 690 /* 253 - Ll category */ true, 691 /* 254 - Ll category */ true, 692 /* 255 - Ll category */ true, 693 }; 694 435 695 template <typename CharType> 436 696 ALWAYS_INLINE TokenType LiteralParser<CharType>::Lexer::lex(LiteralParserToken<CharType>& token) … … 454 714 CharType character = *m_ptr; 455 715 if (LIKELY(isLatin1(character))) { 456 TokenType tokenType = TokenTypesOfLatin1Characters[character];716 TokenType tokenType = tokenTypesOfLatin1Characters[character]; 457 717 switch (tokenType) { 458 718 case TokString: 459 if ( character == '\'' && m_mode == StrictJSON) {719 if (UNLIKELY(character == '\'' && m_mode == StrictJSON)) { 460 720 m_lexErrorMessage = "Single quotes (\') are not allowed in JSON"_s; 461 721 return TokError; … … 573 833 static ALWAYS_INLINE bool isSafeStringCharacter(LChar c, LChar terminator) 574 834 { 575 return (c >= ' ' && c != '\\' && c != terminator) || (c == '\t' && set != SafeStringCharacterSet::Strict); 835 if constexpr (set == SafeStringCharacterSet::Strict) 836 return safeStringLatin1CharactersInStrictJSON[c]; 837 else 838 return (c >= ' ' && c != '\\' && c != terminator) || (c == '\t'); 576 839 } 577 840 … … 579 842 static ALWAYS_INLINE bool isSafeStringCharacter(UChar c, UChar terminator) 580 843 { 581 return (c >= ' ' && (set == SafeStringCharacterSet::Strict || isLatin1(c)) && c != '\\' && c != terminator) || (c == '\t' && set != SafeStringCharacterSet::Strict); 844 if constexpr (set == SafeStringCharacterSet::Strict) { 845 if (!isLatin1(c)) 846 return true; 847 return isSafeStringCharacter<set>(static_cast<LChar>(c), static_cast<LChar>(terminator)); 848 } else 849 return (c >= ' ' && isLatin1(c) && c != '\\' && c != terminator) || (c == '\t'); 582 850 } 583 851 … … 589 857 590 858 if (m_mode == StrictJSON) { 591 while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::Strict>(*m_ptr, terminator)) 859 ASSERT(terminator == '"'); 860 while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::Strict>(*m_ptr, '"')) 592 861 ++m_ptr; 593 862 } else { -
trunk/Source/JavaScriptCore/runtime/LiteralParser.h
r254087 r272570 34 34 namespace JSC { 35 35 36 typedef enum { StrictJSON, NonStrictJSON, JSONP } ParserMode;37 38 enum JSONPPathEntryType {36 enum ParserMode : uint8_t { StrictJSON, NonStrictJSON, JSONP }; 37 38 enum JSONPPathEntryType : uint8_t { 39 39 JSONPPathEntryTypeDeclareVar, // var pathEntryName = JSON 40 40 JSONPPathEntryTypeDot, // <prior entries>.pathEntryName = JSON … … 43 43 }; 44 44 45 enum ParserState { StartParseObject, StartParseArray, StartParseExpression, 46 StartParseStatement, StartParseStatementEndStatement, 47 DoParseObjectStartExpression, DoParseObjectEndExpression, 48 DoParseArrayStartExpression, DoParseArrayEndExpression }; 49 enum TokenType { TokLBracket, TokRBracket, TokLBrace, TokRBrace, 50 TokString, TokIdentifier, TokNumber, TokColon, 51 TokLParen, TokRParen, TokComma, TokTrue, TokFalse, 52 TokNull, TokEnd, TokDot, TokAssign, TokSemi, TokError }; 53 45 enum ParserState : uint8_t { 46 StartParseObject, StartParseArray, StartParseExpression, 47 StartParseStatement, StartParseStatementEndStatement, 48 DoParseObjectStartExpression, DoParseObjectEndExpression, 49 DoParseArrayStartExpression, DoParseArrayEndExpression }; 50 51 enum TokenType : uint8_t { 52 TokLBracket, TokRBracket, TokLBrace, TokRBrace, 53 TokString, TokIdentifier, TokNumber, TokColon, 54 TokLParen, TokRParen, TokComma, TokTrue, TokFalse, 55 TokNull, TokEnd, TokDot, TokAssign, TokSemi, TokError }; 56 54 57 struct JSONPPathEntry { 55 JSONPPathEntryType m_type;56 58 Identifier m_pathEntryName; 57 59 int m_pathIndex; 60 JSONPPathEntryType m_type; 58 61 }; 59 62
Note: See TracChangeset
for help on using the changeset viewer.