Changeset 254788 in webkit


Ignore:
Timestamp:
Jan 17, 2020 7:24:31 PM (4 years ago)
Author:
sbarati@apple.com
Message:

Air O0 should have better stack allocation
https://bugs.webkit.org/show_bug.cgi?id=206436

Reviewed by Tadeu Zagallo.

JSTests:

  • wasm/stress/dont-stack-overflow-in-air.js: Added.

Source/JavaScriptCore:

This patch adds a simple stack slot allocator to Air O0 to make code
use smaller stack frames. The huge stack frames from the old stack
allocator were leading to stack overflows in some programs. Before,
each Tmp got its own stack slot. The new allocator works similar to O0's
register allocator. This stack allocator linearizes the program and uses live
range end as an opportunity to place the stack slot on a free list of
available stack slots. This patch also fixes an issue in our linearization code
where the head of a block and the tail of another block would share the
same linearization index. This didn't matter for register allocation, but
does matter for the stack allocator. So "live at head", and "live at tail"
now get their own linearization index.

  • b3/air/AirAllocateRegistersAndStackAndGenerateCode.cpp:

(JSC::B3::Air::GenerateAndAllocateRegisters::buildLiveRanges):
(JSC::B3::Air::GenerateAndAllocateRegisters::prepareForGeneration):
(JSC::B3::Air::GenerateAndAllocateRegisters::generate):

  • b3/air/AirAllocateRegistersAndStackAndGenerateCode.h:
  • b3/air/AirLiveness.h:
Location:
trunk
Files:
1 added
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/JSTests/ChangeLog

    r254783 r254788  
     12020-01-17  Saam Barati  <sbarati@apple.com>
     2
     3        Air O0 should have better stack allocation
     4        https://bugs.webkit.org/show_bug.cgi?id=206436
     5
     6        Reviewed by Tadeu Zagallo.
     7
     8        * wasm/stress/dont-stack-overflow-in-air.js: Added.
     9
    1102020-01-17  Mark Lam  <mark.lam@apple.com>
    211
  • trunk/Source/JavaScriptCore/ChangeLog

    r254787 r254788  
     12020-01-17  Saam Barati  <sbarati@apple.com>
     2
     3        Air O0 should have better stack allocation
     4        https://bugs.webkit.org/show_bug.cgi?id=206436
     5
     6        Reviewed by Tadeu Zagallo.
     7
     8        This patch adds a simple stack slot allocator to Air O0 to make code
     9        use smaller stack frames. The huge stack frames from the old stack
     10        allocator were leading to stack overflows in some programs. Before,
     11        each Tmp got its own stack slot. The new allocator works similar to O0's
     12        register allocator. This stack allocator linearizes the program and uses live
     13        range end as an opportunity to place the stack slot on a free list of
     14        available stack slots. This patch also fixes an issue in our linearization code
     15        where the head of a block and the tail of another block would share the
     16        same linearization index. This didn't matter for register allocation, but
     17        does matter for the stack allocator. So "live at head", and "live at tail"
     18        now get their own linearization index.
     19
     20        * b3/air/AirAllocateRegistersAndStackAndGenerateCode.cpp:
     21        (JSC::B3::Air::GenerateAndAllocateRegisters::buildLiveRanges):
     22        (JSC::B3::Air::GenerateAndAllocateRegisters::prepareForGeneration):
     23        (JSC::B3::Air::GenerateAndAllocateRegisters::generate):
     24        * b3/air/AirAllocateRegistersAndStackAndGenerateCode.h:
     25        * b3/air/AirLiveness.h:
     26
    1272020-01-17  David Kilzer  <ddkilzer@apple.com>
    228
  • trunk/Source/JavaScriptCore/b3/air/AirAllocateRegistersAndStackAndGenerateCode.cpp

    r254087 r254788  
    8585                m_liveRangeEnd[tmp] = m_globalInstIndex;
    8686        }
     87        ++m_globalInstIndex;
    8788        for (Inst& inst : *block) {
    8889            inst.forEachTmpFast([&] (Tmp tmp) {
     
    9697                m_liveRangeEnd[tmp] = m_globalInstIndex;
    9798        }
     99        ++m_globalInstIndex;
    98100    }
    99101}
     
    281283    allocateEscapedStackSlots(m_code);
    282284
    283     // Each Tmp gets its own stack slot.
    284     auto createStackSlot = [&] (const Tmp& tmp) {
    285         TmpData data;
    286         data.spillSlot = m_code.addStackSlot(8, StackSlotKind::Spill);
    287         data.reg = Reg();
    288         m_map[tmp] = data;
     285    insertBlocksForFlushAfterTerminalPatchpoints();
     286
    289287#if ASSERT_ENABLED
    290         m_allTmps[tmp.bank()].append(tmp);
    291 #endif
    292     };
    293 
    294288    m_code.forEachTmp([&] (Tmp tmp) {
    295289        ASSERT(!tmp.isReg());
    296         createStackSlot(tmp);
     290        m_allTmps[tmp.bank()].append(tmp);
    297291    });
     292#endif
     293
     294    m_liveness = makeUnique<UnifiedTmpLiveness>(m_code);
     295
     296    {
     297        buildLiveRanges(*m_liveness);
     298
     299        Vector<StackSlot*, 16> freeSlots;
     300        Vector<StackSlot*, 4> toFree;
     301        m_globalInstIndex = 0;
     302        for (BasicBlock* block : m_code) {
     303            auto assignStackSlotToTmp = [&] (Tmp tmp) {
     304                if (tmp.isReg())
     305                    return;
     306
     307                TmpData& data = m_map[tmp];
     308                if (data.spillSlot) {
     309                    if (m_liveRangeEnd[tmp] == m_globalInstIndex)
     310                        toFree.append(data.spillSlot);
     311                    return;
     312                }
     313
     314                if (freeSlots.size())
     315                    data.spillSlot = freeSlots.takeLast();
     316                else
     317                    data.spillSlot = m_code.addStackSlot(8, StackSlotKind::Spill);
     318                data.reg = Reg();
     319            };
     320
     321            auto flushToFreeList = [&] {
     322                for (auto* stackSlot : toFree)
     323                    freeSlots.append(stackSlot);
     324                toFree.clear();
     325            };
     326
     327            for (Tmp tmp : m_liveness->liveAtHead(block))
     328                assignStackSlotToTmp(tmp);
     329            flushToFreeList();
     330
     331            ++m_globalInstIndex;
     332
     333            for (Inst& inst : *block) {
     334                Vector<Tmp, 4> seenTmps;
     335                inst.forEachTmpFast([&] (Tmp tmp) {
     336                    if (seenTmps.contains(tmp))
     337                        return;
     338                    seenTmps.append(tmp);
     339                    assignStackSlotToTmp(tmp);
     340                });
     341
     342                flushToFreeList();
     343                ++m_globalInstIndex;
     344            }
     345
     346            for (Tmp tmp : m_liveness->liveAtTail(block))
     347                assignStackSlotToTmp(tmp);
     348            flushToFreeList();
     349
     350            ++m_globalInstIndex;
     351        }
     352    }
    298353
    299354    m_allowedRegisters = RegisterSet();
     
    304359        for (Reg reg : m_registers[bank]) {
    305360            m_allowedRegisters.set(reg);
    306             createStackSlot(Tmp(reg));
     361            TmpData& data = m_map[Tmp(reg)];
     362            data.spillSlot = m_code.addStackSlot(8, StackSlotKind::Spill);
     363            data.reg = Reg();
    307364        }
    308365    });
     
    324381    lowerStackArgs(m_code);
    325382
     383#if ASSERT_ENABLED
    326384    // Verify none of these passes add any tmps.
    327 #if ASSERT_ENABLED
    328385    forEachBank([&] (Bank bank) {
    329         ASSERT(m_allTmps[bank].size() - m_registers[bank].size() == m_code.numTmps(bank));
     386        ASSERT(m_allTmps[bank].size() == m_code.numTmps(bank));
    330387    });
     388
     389    {
     390        // Verify that lowerStackArgs didn't change Tmp liveness at the boundaries for the Tmps and Registers we model.
     391        UnifiedTmpLiveness liveness(m_code);
     392        for (BasicBlock* block : m_code) {
     393            auto assertLivenessAreEqual = [&] (auto a, auto b) {
     394                HashSet<Tmp> livenessA;
     395                HashSet<Tmp> livenessB;
     396                for (Tmp tmp : a) {
     397                    if (tmp.isReg() && isDisallowedRegister(tmp.reg()))
     398                        continue;
     399                    livenessA.add(tmp);
     400                }
     401                for (Tmp tmp : b) {
     402                    if (tmp.isReg() && isDisallowedRegister(tmp.reg()))
     403                        continue;
     404                    livenessB.add(tmp);
     405                }
     406
     407                ASSERT(livenessA == livenessB);
     408            };
     409
     410            assertLivenessAreEqual(m_liveness->liveAtHead(block), liveness.liveAtHead(block));
     411            assertLivenessAreEqual(m_liveness->liveAtTail(block), liveness.liveAtTail(block));
     412        }
     413    }
    331414#endif
    332415}
     
    338421    TimingScope timingScope("Air::generateAndAllocateRegisters");
    339422
    340     insertBlocksForFlushAfterTerminalPatchpoints();
    341 
    342423    DisallowMacroScratchRegisterUsage disallowScratch(*m_jit);
    343424
    344     UnifiedTmpLiveness liveness(m_code);
    345     buildLiveRanges(liveness);
     425    buildLiveRanges(*m_liveness);
    346426
    347427    IndexMap<BasicBlock*, IndexMap<Reg, Tmp>> currentAllocationMap(m_code.size());
     
    356436        for (unsigned i = m_code.numEntrypoints(); i--;) {
    357437            BasicBlock* entrypoint = m_code.entrypoint(i).block();
    358             for (Tmp tmp : liveness.liveAtHead(entrypoint)) {
     438            for (Tmp tmp : m_liveness->liveAtHead(entrypoint)) {
    359439                if (tmp.isReg())
    360440                    currentAllocationMap[entrypoint][tmp.reg()] = tmp;
     
    443523            m_availableRegs[tmp.bank()].clear(reg);
    444524        }
     525
     526        ++m_globalInstIndex;
    445527
    446528        bool isReplayingSameInst = false;
     
    674756                }
    675757                if (!everySuccessorGetsOurRegisterState) {
    676                     for (Tmp tmp : liveness.liveAtTail(block)) {
     758                    for (Tmp tmp : m_liveness->liveAtTail(block)) {
    677759                        if (tmp.isReg() && isDisallowedRegister(tmp.reg()))
    678760                            continue;
     
    753835                m_map[tmp].reg = Reg();
    754836        }
     837
     838        ++m_globalInstIndex;
    755839    }
    756840
  • trunk/Source/JavaScriptCore/b3/air/AirAllocateRegistersAndStackAndGenerateCode.h

    r254087 r254788  
    4444
    4545    struct TmpData {
    46         StackSlot* spillSlot;
     46        StackSlot* spillSlot { nullptr };
    4747        Reg reg;
    4848    };
     
    8585    RegisterSet m_namedDefdRegs;
    8686    RegisterSet m_allowedRegisters;
     87    std::unique_ptr<UnifiedTmpLiveness> m_liveness;
    8788
    8889    struct PatchSpillData {
  • trunk/Source/JavaScriptCore/b3/air/AirLiveness.h

    r218794 r254788  
    3737template<typename Adapter>
    3838class Liveness : public WTF::Liveness<Adapter> {
     39    WTF_MAKE_FAST_ALLOCATED;
    3940public:
    4041    Liveness(Code& code)
Note: See TracChangeset for help on using the changeset viewer.