Changeset 199115 in webkit


Ignore:
Timestamp:
Apr 6, 2016 1:52:11 PM (8 years ago)
Author:
ggaren@apple.com
Message:

bmalloc: handle aligned allocations on the fast path
https://bugs.webkit.org/show_bug.cgi?id=156302

Reviewed by Michael Saboff.

This helps keep the JavaScriptCore GC on the fast path, and it also
helps avoid fragmentation on our website stress test:

nimlang 209,584kB 198,076kB 1.06x smaller

  • bmalloc/Allocator.cpp:

(bmalloc::Allocator::allocate): Because we arrange for power-of-two size
classes to allocate at power-of-two alignments, we can allocate any
small aligned request on the small path.

  • bmalloc/Chunk.h:

(bmalloc::Chunk::bytes):
(bmalloc::Chunk::lines):
(bmalloc::Chunk::pages):
(bmalloc::Chunk::boundaryTags):
(bmalloc::Chunk::objectType): Moved some code around to provide better
API.

(bmalloc::Chunk::Chunk): Moved this code to VMHeap.

(bmalloc::Chunk::offset):
(bmalloc::Chunk::object): Use our new bytes() helper function.

  • bmalloc/VMHeap.cpp:

(bmalloc::VMHeap::allocateChunk): Moved code here from Chunk.

(bmalloc::VMHeap::allocateSmallChunk): Ensure that power-of-two page
sizes always begin allocation at the same alignment. Power-of-two object
sizes always request power-of-two page sizes (since that's the least
wasteful option), so if we also ensure that power-of-two page sizes get
power-of-two alignment, then everything is aligned for all small objects.

Location:
trunk/Source/bmalloc
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/bmalloc/ChangeLog

    r198995 r199115  
     12016-04-06  Geoffrey Garen  <ggaren@apple.com>
     2
     3        bmalloc: handle aligned allocations on the fast path
     4        https://bugs.webkit.org/show_bug.cgi?id=156302
     5
     6        Reviewed by Michael Saboff.
     7
     8        This helps keep the JavaScriptCore GC on the fast path, and it also
     9        helps avoid fragmentation on our website stress test:
     10
     11            nimlang                      209,584kB            198,076kB      ^ 1.06x smaller
     12
     13        * bmalloc/Allocator.cpp:
     14        (bmalloc::Allocator::allocate): Because we arrange for power-of-two size
     15        classes to allocate at power-of-two alignments, we can allocate any
     16        small aligned request on the small path.
     17
     18        * bmalloc/Chunk.h:
     19        (bmalloc::Chunk::bytes):
     20        (bmalloc::Chunk::lines):
     21        (bmalloc::Chunk::pages):
     22        (bmalloc::Chunk::boundaryTags):
     23        (bmalloc::Chunk::objectType): Moved some code around to provide better
     24        API.
     25
     26        (bmalloc::Chunk::Chunk): Moved this code to VMHeap.
     27
     28        (bmalloc::Chunk::offset):
     29        (bmalloc::Chunk::object): Use our new bytes() helper function.
     30
     31        * bmalloc/VMHeap.cpp:
     32        (bmalloc::VMHeap::allocateChunk): Moved code here from Chunk.
     33
     34        (bmalloc::VMHeap::allocateSmallChunk): Ensure that power-of-two page
     35        sizes always begin allocation at the same alignment. Power-of-two object
     36        sizes always request power-of-two page sizes (since that's the least
     37        wasteful option), so if we also ensure that power-of-two page sizes get
     38        power-of-two alignment, then everything is aligned for all small objects.
     39
    1402016-04-03  Geoffrey Garen  <ggaren@apple.com>
    241
  • trunk/Source/bmalloc/bmalloc/Allocator.cpp

    r198700 r199115  
    7878        return result;
    7979    }
    80    
    81     if (size <= smallMax && alignment <= smallLineSize) {
    82         size_t alignmentMask = alignment - 1;
    83         while (void* p = allocate(size)) {
    84             if (!test(p, alignmentMask))
    85                 return p;
    86             m_deallocator.deallocate(p);
    87         }
    88     }
     80
     81    if (!size)
     82        size = alignment;
     83
     84    if (size <= smallMax && alignment <= smallMax)
     85        return allocate(roundUpToMultipleOf(alignment, size));
    8986
    9087    if (size <= largeMax && alignment <= largeMax) {
  • trunk/Source/bmalloc/bmalloc/Chunk.h

    r198995 r199115  
    4040
    4141class Chunk {
     42    // Our metadata layout includes a left and right edge sentinel.
     43    // Metadata takes up enough space to leave at least the first two
     44    // boundary tag slots unused.
     45    //
     46    //      So, boundary tag space looks like this:
     47    //
     48    //          [OOXXXXX...]
     49    //
     50    //      And BoundaryTag::get subtracts one, producing:
     51    //
     52    //          [OXXXXX...O].
     53    //
     54    // We use the X's for boundary tags and the O's for edge sentinels.
     55
     56    static const size_t boundaryTagCount = chunkSize / largeMin;
     57    static_assert(boundaryTagCount > 2, "Chunk must have space for two sentinel boundary tags");
     58
    4259public:
    4360    static Chunk* get(void*);
     
    5471    SmallLine* line(size_t offset);
    5572
     73    char* bytes() { return reinterpret_cast<char*>(this); }
    5674    SmallLine* lines() { return m_lines.begin(); }
    5775    SmallPage* pages() { return m_pages.begin(); }
    58 
    59     char* begin() { return roundUpToMultipleOf(vmPageSizePhysical(), m_memory); }
    60     char* end() { return reinterpret_cast<char*>(this) + chunkSize; }
    61     size_t size() { return end() - begin(); }
     76    std::array<BoundaryTag, boundaryTagCount>& boundaryTags() { return m_boundaryTags; }
    6277
    6378    ObjectType objectType() { return m_objectType; }
    6479
    6580private:
    66     static const size_t boundaryTagCount = chunkSize / largeMin;
    67     static_assert(boundaryTagCount > 2, "Chunk must have space for two sentinel boundary tags");
    68 
    69     // Our metadata layout includes a left and right edge sentinel.
    70     // Metadata takes up enough space to leave at least the first two
    71     // boundary tag slots unused.
    72     //
    73     //      So, boundary tag space looks like this:
    74     //
    75     //          [OOXXXXX...]
    76     //
    77     //      And BoundaryTag::get subtracts one, producing:
    78     //
    79     //          [OXXXXX...O].
    80     //
    81     // We use the X's for boundary tags and the O's for edge sentinels.
    82 
    8381    union {
    8482        // The first few bytes of metadata cover the metadata region, so they're
     
    9492        std::array<BoundaryTag, boundaryTagCount> m_boundaryTags;
    9593    };
    96     char m_memory[];
    9794};
    9895
     
    105102    : m_objectType(objectType)
    106103{
    107     if (objectType != ObjectType::Large)
    108         return;
    109 
    110     Range range(begin(), size());
    111     BASSERT(range.size() <= largeObjectMax);
    112 
    113     BeginTag* beginTag = Chunk::beginTag(range.begin());
    114     beginTag->setRange(range);
    115     beginTag->setFree(true);
    116     beginTag->setVMState(VMState::Virtual);
    117 
    118     EndTag* endTag = Chunk::endTag(range.begin(), range.size());
    119     endTag->init(beginTag);
    120 
    121     // Mark the left and right edges of our range as allocated. This naturally
    122     // prevents merging logic from overflowing left (into metadata) or right
    123     // (beyond our chunk), without requiring special-case checks.
    124 
    125     EndTag* leftSentinel = beginTag->prev();
    126     BASSERT(leftSentinel >= m_boundaryTags.begin());
    127     BASSERT(leftSentinel < m_boundaryTags.end());
    128     leftSentinel->initSentinel();
    129 
    130     BeginTag* rightSentinel = endTag->next();
    131     BASSERT(rightSentinel >= m_boundaryTags.begin());
    132     BASSERT(rightSentinel < m_boundaryTags.end());
    133     rightSentinel->initSentinel();
    134104}
    135105
     
    162132{
    163133    BASSERT(object >= this);
    164     BASSERT(object < reinterpret_cast<char*>(this) + chunkSize);
    165     return static_cast<char*>(object) - reinterpret_cast<char*>(this);
     134    BASSERT(object < bytes() + chunkSize);
     135    return static_cast<char*>(object) - bytes();
    166136}
    167137
    168138inline char* Chunk::object(size_t offset)
    169139{
    170     return reinterpret_cast<char*>(this) + offset;
     140    return bytes() + offset;
    171141}
    172142
  • trunk/Source/bmalloc/bmalloc/VMHeap.cpp

    r198995 r199115  
    4545#endif
    4646
    47     return LargeObject(chunk->begin());
     47    size_t alignment = largeAlignment;
     48    size_t metadataSize = roundUpToMultipleOf(alignment, sizeof(Chunk));
     49
     50    Range range(chunk->bytes() + metadataSize, chunkSize - metadataSize);
     51    BASSERT(range.size() <= largeObjectMax);
     52
     53    BeginTag* beginTag = Chunk::beginTag(range.begin());
     54    beginTag->setRange(range);
     55    beginTag->setFree(true);
     56    beginTag->setVMState(VMState::Virtual);
     57
     58    EndTag* endTag = Chunk::endTag(range.begin(), range.size());
     59    endTag->init(beginTag);
     60
     61    // Mark the left and right edges of our range as allocated. This naturally
     62    // prevents merging logic from overflowing left (into metadata) or right
     63    // (beyond our chunk), without requiring special-case checks.
     64
     65    EndTag* leftSentinel = beginTag->prev();
     66    BASSERT(leftSentinel >= chunk->boundaryTags().begin());
     67    BASSERT(leftSentinel < chunk->boundaryTags().end());
     68    leftSentinel->initSentinel();
     69
     70    BeginTag* rightSentinel = endTag->next();
     71    BASSERT(rightSentinel >= chunk->boundaryTags().begin());
     72    BASSERT(rightSentinel < chunk->boundaryTags().end());
     73    rightSentinel->initSentinel();
     74
     75    return LargeObject(range.begin());
    4876}
    4977
     
    6088    size_t smallPageCount = pageSize / smallPageSize;
    6189
    62     Object begin(chunk->begin());
    63     Object end(begin + chunk->size());
     90    // If our page size is a power of two, we align to it in order to guarantee
     91    // that we can service aligned allocation requests at the same power of two.
     92    size_t alignment = vmPageSizePhysical();
     93    if (isPowerOfTwo(pageSize))
     94        alignment = pageSize;
     95    size_t metadataSize = roundUpToMultipleOf(alignment, sizeof(Chunk));
     96
     97    Object begin(chunk, metadataSize);
     98    Object end(chunk, chunkSize);
    6499
    65100    for (Object it = begin; it + pageSize <= end; it = it + pageSize) {
Note: See TracChangeset for help on using the changeset viewer.