Changeset 57925 in webkit


Ignore:
Timestamp:
Apr 20, 2010 3:05:43 PM (14 years ago)
Author:
oliver@apple.com
Message:

2010-04-20 Oliver Hunt <oliver@apple.com>

Reviewed by Gavin Barraclough.

Autogenerate yarr character tables
https://bugs.webkit.org/show_bug.cgi?id=37877

Use a python script to automatically generate character tables
for the builtin YARR character classes. This allows us to generate
actual tables as well, by using these tables we can both increase
performance of the check (for complex builtins) and reduce the actual
code size.

4-8% win on string-unpack-code, but lots of noise on other tests so
i'm only confident saying its a 1% win overall.

  • DerivedSources.make:
  • JavaScriptCore.xcodeproj/project.pbxproj:
  • assembler/AbstractMacroAssembler.h: (JSC::AbstractMacroAssembler::ExtendedAddress::ExtendedAddress):
  • assembler/MacroAssembler.h: (JSC::MacroAssembler::branchTest8):
  • assembler/MacroAssemblerX86Common.h: (JSC::MacroAssemblerX86Common::branchTest8):
  • assembler/MacroAssemblerX86_64.h: (JSC::MacroAssemblerX86_64::branchTest8):
  • assembler/X86Assembler.h: (JSC::X86Assembler::cmpb_im): (JSC::X86Assembler::testb_im):
  • bytecode/SamplingTool.cpp: (JSC::SamplingTool::dump):
  • create_regex_tables: Added.
  • yarr/RegexCompiler.cpp: (JSC::Yarr::CharacterClassConstructor::charClass):
  • yarr/RegexJIT.cpp: (JSC::Yarr::RegexGenerator::matchCharacterClass): (JSC::Yarr::RegexGenerator::generatePatternCharacterGreedy): (JSC::Yarr::RegexGenerator::generatePatternCharacterNonGreedy): (JSC::Yarr::RegexGenerator::generateCharacterClassGreedy):
  • yarr/RegexPattern.h: (JSC::Yarr::CharacterClassTable::create): (JSC::Yarr::CharacterClassTable::CharacterClassTable): (JSC::Yarr::CharacterClass::CharacterClass):
Location:
trunk/JavaScriptCore
Files:
1 added
13 edited

Legend:

Unmodified
Added
Removed
  • trunk/JavaScriptCore/ChangeLog

    r57917 r57925  
     12010-04-20  Oliver Hunt  <oliver@apple.com>
     2
     3        Reviewed by Gavin Barraclough.
     4
     5        Autogenerate yarr character tables
     6        https://bugs.webkit.org/show_bug.cgi?id=37877
     7
     8        Use a python script to automatically generate character tables
     9        for the builtin YARR character classes.  This allows us to generate
     10        actual tables as well, by using these tables we can both increase
     11        performance of the check (for complex builtins) and reduce the actual
     12        code size.
     13
     14        4-8% win on string-unpack-code, but lots of noise on other tests so
     15        i'm only confident saying its a 1% win overall.
     16
     17        * DerivedSources.make:
     18        * JavaScriptCore.xcodeproj/project.pbxproj:
     19        * assembler/AbstractMacroAssembler.h:
     20        (JSC::AbstractMacroAssembler::ExtendedAddress::ExtendedAddress):
     21        * assembler/MacroAssembler.h:
     22        (JSC::MacroAssembler::branchTest8):
     23        * assembler/MacroAssemblerX86Common.h:
     24        (JSC::MacroAssemblerX86Common::branchTest8):
     25        * assembler/MacroAssemblerX86_64.h:
     26        (JSC::MacroAssemblerX86_64::branchTest8):
     27        * assembler/X86Assembler.h:
     28        (JSC::X86Assembler::cmpb_im):
     29        (JSC::X86Assembler::testb_im):
     30        * bytecode/SamplingTool.cpp:
     31        (JSC::SamplingTool::dump):
     32        * create_regex_tables: Added.
     33        * yarr/RegexCompiler.cpp:
     34        (JSC::Yarr::CharacterClassConstructor::charClass):
     35        * yarr/RegexJIT.cpp:
     36        (JSC::Yarr::RegexGenerator::matchCharacterClass):
     37        (JSC::Yarr::RegexGenerator::generatePatternCharacterGreedy):
     38        (JSC::Yarr::RegexGenerator::generatePatternCharacterNonGreedy):
     39        (JSC::Yarr::RegexGenerator::generateCharacterClassGreedy):
     40        * yarr/RegexPattern.h:
     41        (JSC::Yarr::CharacterClassTable::create):
     42        (JSC::Yarr::CharacterClassTable::CharacterClassTable):
     43        (JSC::Yarr::CharacterClass::CharacterClass):
     44
    1452010-04-20  Gavin Barraclough  <barraclough@apple.com>
    246
  • trunk/JavaScriptCore/DerivedSources.make

    r44550 r57925  
    4949    StringPrototype.lut.h \
    5050    docs/bytecode.html \
     51    RegExpJitTables.h \
    5152#
    5253
     
    7576docs/bytecode.html: make-bytecode-docs.pl Interpreter.cpp
    7677        perl $^ $@
     78
     79#character tables for Yarr
     80RegExpJitTables.h: create_regex_tables
     81        python $^ > $@
  • trunk/JavaScriptCore/DerivedSources.pro

    r55083 r57925  
    9494addExtraCompiler(ctgen)
    9595
     96#GENERATOR: "RegExpJitTables.h": tables used by Yarr
     97retgen.output = $$JSC_GENERATED_SOURCES_DIR/RegExpJitTables.h
     98retgen.wkScript = $$PWD/create_regex_tables
     99retgen.input = retgen.wkScript
     100retgen.commands = python $$retgen.wkScript > ${QMAKE_FILE_OUT}
     101addExtraCompiler(retgen)
  • trunk/JavaScriptCore/JavaScriptCore.xcodeproj/project.pbxproj

    r57912 r57925  
    854854                96DD73780F9DA3100027FBCC /* VMTags.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = VMTags.h; sourceTree = "<group>"; };
    855855                97F6903A1169DF7F00A6BB46 /* Terminator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Terminator.h; sourceTree = "<group>"; };
     856                A718F61A11754A21002465A7 /* RegExpJitTables.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RegExpJitTables.h; sourceTree = "<group>"; };
     857                A718F8211178EB4B002465A7 /* create_regex_tables */ = {isa = PBXFileReference; explicitFileType = text.script.python; fileEncoding = 4; path = create_regex_tables; sourceTree = "<group>"; };
    856858                A72700770DAC605600E548D7 /* JSNotAnObject.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = JSNotAnObject.h; sourceTree = "<group>"; };
    857859                A72700780DAC605600E548D7 /* JSNotAnObject.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = JSNotAnObject.cpp; sourceTree = "<group>"; };
     
    11141116                        isa = PBXGroup;
    11151117                        children = (
     1118                                A718F8211178EB4B002465A7 /* create_regex_tables */,
    11161119                                937B63CC09E766D200A671DD /* DerivedSources.make */,
    11171120                                F692A8540255597D01FF60F7 /* create_hash_table */,
     
    13051308                        isa = PBXGroup;
    13061309                        children = (
     1310                                A718F61A11754A21002465A7 /* RegExpJitTables.h */,
    13071311                                BC18C5230E16FC8A00B34460 /* ArrayPrototype.lut.h */,
    13081312                                65B174BE09D1000200820339 /* chartables.c */,
  • trunk/JavaScriptCore/assembler/AbstractMacroAssembler.h

    r55633 r57925  
    8282    };
    8383
     84    struct ExtendedAddress {
     85        explicit ExtendedAddress(RegisterID base, intptr_t offset = 0)
     86            : base(base)
     87            , offset(offset)
     88        {
     89        }
     90       
     91        RegisterID base;
     92        intptr_t offset;
     93    };
     94
    8495    // ImplicitAddress:
    8596    //
  • trunk/JavaScriptCore/assembler/MacroAssembler.h

    r55633 r57925  
    332332        return branchSub32(cond, imm, dest);
    333333    }
     334    using MacroAssemblerBase::branchTest8;
     335    Jump branchTest8(Condition cond, ExtendedAddress address, Imm32 mask = Imm32(-1))
     336    {
     337        return MacroAssemblerBase::branchTest8(cond, Address(address.base, address.offset), mask);
     338    }
    334339#endif
    335340
  • trunk/JavaScriptCore/assembler/MacroAssemblerX86Common.h

    r56348 r57925  
    730730        else
    731731            m_assembler.testb_im(mask.m_value, address.offset, address.base);
     732        return Jump(m_assembler.jCC(x86Condition(cond)));
     733    }
     734   
     735    Jump branchTest8(Condition cond, BaseIndex address, Imm32 mask = Imm32(-1))
     736    {
     737        ASSERT((cond == Zero) || (cond == NonZero));
     738        if (mask.m_value == -1)
     739            m_assembler.cmpb_im(0, address.offset, address.base, address.index, address.scale);
     740        else
     741            m_assembler.testb_im(mask.m_value, address.offset, address.base, address.index, address.scale);
    732742        return Jump(m_assembler.jCC(x86Condition(cond)));
    733743    }
  • trunk/JavaScriptCore/assembler/MacroAssemblerX86_64.h

    r55633 r57925  
    410410    }
    411411
     412    using MacroAssemblerX86Common::branchTest8;
     413    Jump branchTest8(Condition cond, ExtendedAddress address, Imm32 mask = Imm32(-1))
     414    {
     415        ImmPtr addr(reinterpret_cast<void*>(address.offset));
     416        MacroAssemblerX86Common::move(addr, scratchRegister);
     417        return MacroAssemblerX86Common::branchTest8(cond, BaseIndex(scratchRegister, address.base, TimesOne), mask);
     418    }
     419
    412420    Label loadPtrWithPatchToLEA(Address address, RegisterID dest)
    413421    {
  • trunk/JavaScriptCore/assembler/X86Assembler.h

    r55684 r57925  
    777777        m_formatter.immediate8(imm);
    778778    }
     779   
     780    void cmpb_im(int imm, int offset, RegisterID base, RegisterID index, int scale)
     781    {
     782        m_formatter.oneByteOp(OP_GROUP1_EbIb, GROUP1_OP_CMP, base, index, scale, offset);
     783        m_formatter.immediate8(imm);
     784    }
    779785
    780786    void cmpl_im(int imm, int offset, RegisterID base, RegisterID index, int scale)
     
    900906    {
    901907        m_formatter.oneByteOp(OP_GROUP3_EbIb, GROUP3_OP_TEST, base, offset);
     908        m_formatter.immediate8(imm);
     909    }
     910   
     911    void testb_im(int imm, int offset, RegisterID base, RegisterID index, int scale)
     912    {
     913        m_formatter.oneByteOp(OP_GROUP3_EbIb, GROUP3_OP_TEST, base, index, scale, offset);
    902914        m_formatter.immediate8(imm);
    903915    }
  • trunk/JavaScriptCore/bytecode/SamplingTool.cpp

    r52791 r57925  
    338338        if (blockPercent >= 1) {
    339339            //Instruction* code = codeBlock->instructions().begin();
    340             printf("#%d: %s:%d: %d / %lld (%.3f%%)\n", i + 1, record->m_executable->sourceURL().UTF8String().c_str(), codeBlock->lineNumberForBytecodeOffset(exec, 0), record->m_sampleCount, m_sampleCount, blockPercent);
     340            printf("#%d: %s:%d: %d / %lld (%.3f%%)\n", i + 1, record->m_executable->sourceURL().ascii(), codeBlock->lineNumberForBytecodeOffset(exec, 0), record->m_sampleCount, m_sampleCount, blockPercent);
    341341            if (i < 10) {
    342342                HashMap<unsigned,unsigned> lineCounts;
  • trunk/JavaScriptCore/yarr/RegexCompiler.cpp

    r57608 r57925  
    3737namespace JSC { namespace Yarr {
    3838
     39#include "RegExpJitTables.h"
     40
    3941class CharacterClassConstructor {
    4042public:
     
    142144    CharacterClass* charClass()
    143145    {
    144         CharacterClass* characterClass = new CharacterClass();
     146        CharacterClass* characterClass = new CharacterClass(0);
    145147
    146148        characterClass->m_matches.append(m_matches);
     
    234236};
    235237
    236 
    237 CharacterClass* newlineCreate()
    238 {
    239     CharacterClass* characterClass = new CharacterClass();
    240 
    241     characterClass->m_matches.append('\n');
    242     characterClass->m_matches.append('\r');
    243     characterClass->m_matchesUnicode.append(0x2028);
    244     characterClass->m_matchesUnicode.append(0x2029);
    245    
    246     return characterClass;
    247 }
    248 
    249 CharacterClass* digitsCreate()
    250 {
    251     CharacterClass* characterClass = new CharacterClass();
    252 
    253     characterClass->m_ranges.append(CharacterRange('0', '9'));
    254    
    255     return characterClass;
    256 }
    257 
    258 CharacterClass* spacesCreate()
    259 {
    260     CharacterClass* characterClass = new CharacterClass();
    261 
    262     characterClass->m_matches.append(' ');
    263     characterClass->m_ranges.append(CharacterRange('\t', '\r'));
    264     characterClass->m_matchesUnicode.append(0x00a0);
    265     characterClass->m_matchesUnicode.append(0x1680);
    266     characterClass->m_matchesUnicode.append(0x180e);
    267     characterClass->m_matchesUnicode.append(0x2028);
    268     characterClass->m_matchesUnicode.append(0x2029);
    269     characterClass->m_matchesUnicode.append(0x202f);
    270     characterClass->m_matchesUnicode.append(0x205f);
    271     characterClass->m_matchesUnicode.append(0x3000);
    272     characterClass->m_rangesUnicode.append(CharacterRange(0x2000, 0x200a));
    273    
    274     return characterClass;
    275 }
    276 
    277 CharacterClass* wordcharCreate()
    278 {
    279     CharacterClass* characterClass = new CharacterClass();
    280 
    281     characterClass->m_matches.append('_');
    282     characterClass->m_ranges.append(CharacterRange('0', '9'));
    283     characterClass->m_ranges.append(CharacterRange('A', 'Z'));
    284     characterClass->m_ranges.append(CharacterRange('a', 'z'));
    285    
    286     return characterClass;
    287 }
    288 
    289 CharacterClass* nondigitsCreate()
    290 {
    291     CharacterClass* characterClass = new CharacterClass();
    292 
    293     characterClass->m_ranges.append(CharacterRange(0, '0' - 1));
    294     characterClass->m_ranges.append(CharacterRange('9' + 1, 0x7f));
    295     characterClass->m_rangesUnicode.append(CharacterRange(0x80, 0xffff));
    296    
    297     return characterClass;
    298 }
    299 
    300 CharacterClass* nonspacesCreate()
    301 {
    302     CharacterClass* characterClass = new CharacterClass();
    303 
    304     characterClass->m_ranges.append(CharacterRange(0, '\t' - 1));
    305     characterClass->m_ranges.append(CharacterRange('\r' + 1, ' ' - 1));
    306     characterClass->m_ranges.append(CharacterRange(' ' + 1, 0x7f));
    307     characterClass->m_rangesUnicode.append(CharacterRange(0x0080, 0x009f));
    308     characterClass->m_rangesUnicode.append(CharacterRange(0x00a1, 0x167f));
    309     characterClass->m_rangesUnicode.append(CharacterRange(0x1681, 0x180d));
    310     characterClass->m_rangesUnicode.append(CharacterRange(0x180f, 0x1fff));
    311     characterClass->m_rangesUnicode.append(CharacterRange(0x200b, 0x2027));
    312     characterClass->m_rangesUnicode.append(CharacterRange(0x202a, 0x202e));
    313     characterClass->m_rangesUnicode.append(CharacterRange(0x2030, 0x205e));
    314     characterClass->m_rangesUnicode.append(CharacterRange(0x2060, 0x2fff));
    315     characterClass->m_rangesUnicode.append(CharacterRange(0x3001, 0xffff));
    316    
    317     return characterClass;
    318 }
    319 
    320 CharacterClass* nonwordcharCreate()
    321 {
    322     CharacterClass* characterClass = new CharacterClass();
    323 
    324     characterClass->m_matches.append('`');
    325     characterClass->m_ranges.append(CharacterRange(0, '0' - 1));
    326     characterClass->m_ranges.append(CharacterRange('9' + 1, 'A' - 1));
    327     characterClass->m_ranges.append(CharacterRange('Z' + 1, '_' - 1));
    328     characterClass->m_ranges.append(CharacterRange('z' + 1, 0x7f));
    329     characterClass->m_rangesUnicode.append(CharacterRange(0x80, 0xffff));
    330 
    331     return characterClass;
    332 }
    333 
    334 
    335238class RegexPatternConstructor {
    336239public:
  • trunk/JavaScriptCore/yarr/RegexJIT.cpp

    r57608 r57925  
    4141namespace JSC { namespace Yarr {
    4242
    43 
    4443class RegexGenerator : private MacroAssembler {
    4544    friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
     
    156155    void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
    157156    {
     157        if (charClass->m_table) {
     158            ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
     159            matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));   
     160            return;
     161        }
    158162        Jump unicodeFail;
    159163        if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
     
    610614            failures.append(jumpIfCharNotEquals(ch, state.inputOffset()));
    611615        }
     616
    612617        add32(Imm32(1), countRegister);
    613618        add32(Imm32(1), index);
    614         branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
     619        if (term.quantityCount != 0xffffffff)
     620            branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
     621        else
     622            jump(loop);
     623
    615624        failures.append(jump());
    616625
     
    647656
    648657        atEndOfInput().linkTo(hardFail, this);
    649         branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
     658        if (term.quantityCount != 0xffffffff)
     659            branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
    650660        if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
    651661            readCharacter(state.inputOffset(), character);
     
    733743        add32(Imm32(1), countRegister);
    734744        add32(Imm32(1), index);
    735         branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
     745        if (term.quantityCount != 0xffffffff)
     746            branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
     747        else
     748            jump(loop);
     749
    736750        failures.append(jump());
    737751
  • trunk/JavaScriptCore/yarr/RegexPattern.h

    r57608 r57925  
    5757};
    5858
     59struct CharacterClassTable : RefCounted<CharacterClassTable> {
     60    const char* m_table;
     61    bool m_inverted;
     62    static PassRefPtr<CharacterClassTable> create(const char* table, bool inverted)
     63    {
     64        return adoptRef(new CharacterClassTable(table, inverted));
     65    }
     66
     67private:
     68    CharacterClassTable(const char* table, bool inverted)
     69        : m_table(table)
     70        , m_inverted(inverted)
     71    {
     72    }
     73};
     74
    5975struct CharacterClass : FastAllocBase {
     76    // All CharacterClass instances have to have the full set of matches and ranges,
     77    // they may have an optional table for faster lookups (which must match the
     78    // specified matches and ranges)
     79    CharacterClass(PassRefPtr<CharacterClassTable> table)
     80        : m_table(table)
     81    {
     82    }
    6083    Vector<UChar> m_matches;
    6184    Vector<CharacterRange> m_ranges;
    6285    Vector<UChar> m_matchesUnicode;
    6386    Vector<CharacterRange> m_rangesUnicode;
     87    RefPtr<CharacterClassTable> m_table;
    6488};
    6589
Note: See TracChangeset for help on using the changeset viewer.