Changeset 249816 in webkit


Ignore:
Timestamp:
Sep 12, 2019 2:47:25 PM (5 years ago)
Author:
sbarati@apple.com
Message:

[WHLSL] Slim down WSLMatrix and inline constructors in native code
https://bugs.webkit.org/show_bug.cgi?id=201568

Reviewed by Robin Morisset.

Before, our WSL Matrix in MSL had templates to figure out how we're
constructing it. For example, with a list of elements, or a list of
columns. However, we can remove this template code since when we're
emitting Metal code, we know exactly how we're constructing the WSL
matrix. So the NativeFunctionWriter now inlines the proper stores
into the WSLMatrix elements.

This patch speeds up Metal compile times in boids by ~4ms (16%) with
a p-value of 0.0001.

Covered by existing tests.

  • Modules/webgpu/WHLSL/Metal/WHLSLMetalCodeGenerator.cpp:

(WebCore::WHLSL::Metal::metalCodePrologue):

  • Modules/webgpu/WHLSL/Metal/WHLSLNativeFunctionWriter.cpp:

(WebCore::WHLSL::Metal::inlineNativeFunction):

Location:
trunk/Source/WebCore
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/WebCore/ChangeLog

    r249815 r249816  
     12019-09-12  Saam Barati  <sbarati@apple.com>
     2
     3        [WHLSL] Slim down WSLMatrix and inline constructors in native code
     4        https://bugs.webkit.org/show_bug.cgi?id=201568
     5
     6        Reviewed by Robin Morisset.
     7
     8        Before, our WSL Matrix in MSL had templates to figure out how we're
     9        constructing it. For example, with a list of elements, or a list of
     10        columns. However, we can remove this template code since when we're
     11        emitting Metal code, we know exactly how we're constructing the WSL
     12        matrix. So the NativeFunctionWriter now inlines the proper stores
     13        into the WSLMatrix elements.
     14       
     15        This patch speeds up Metal compile times in boids by ~4ms (16%) with
     16        a p-value of 0.0001.
     17
     18        Covered by existing tests.
     19
     20        * Modules/webgpu/WHLSL/Metal/WHLSLMetalCodeGenerator.cpp:
     21        (WebCore::WHLSL::Metal::metalCodePrologue):
     22        * Modules/webgpu/WHLSL/Metal/WHLSLNativeFunctionWriter.cpp:
     23        (WebCore::WHLSL::Metal::inlineNativeFunction):
     24
    1252019-09-12  Wenson Hsieh  <wenson_hsieh@apple.com>
    226
  • trunk/Source/WebCore/Modules/webgpu/WHLSL/Metal/WHLSLMetalCodeGenerator.cpp

    r249351 r249816  
    5959        "    vec<T, Rows> columns[Cols];\n"
    6060        "    private:\n"
    61         "    template <typename U, int... R>\n"
    62         "    static vec<T, Rows> build_col(initializer_list<U> col, _integer_sequence<int, R...>)\n"
    63         "    {\n"
    64         "        return {(R < col.size() ? *(col.begin() + R) : U())...};\n"
    65         "    }\n"
    66         "    template <int... R>\n"
    67         "    static vec<T, Rows> build_full_col(int c, initializer_list<T> elems, _integer_sequence<int, R...>)\n"
    68         "    {\n"
    69         "        return {*(elems.begin() + c * Rows + R)...};\n"
    70         "    }\n"
    71         "    struct cols_init_tag { };\n"
    72         "    struct cols_all_tag { };\n"
    73         "    struct elems_all_tag { };\n"
    74         "    template <int... C>\n"
    75         "    inline explicit WSLMatrix(cols_init_tag, initializer_list<vec<T, Rows>> cols, _integer_sequence<int, C...>) thread\n"
    76         "        : columns{(C < cols.size() ? *(cols.begin() + C) : vec<T, Rows>())...}\n"
    77         "    {\n"
    78         "    }\n"
    79         "    template <typename... U>\n"
    80         "    inline explicit WSLMatrix(cols_all_tag, U... cols) thread\n"
    81         "        : columns{ cols... }\n"
    82         "    {\n"
    83         "    }\n"
    84         "    template <typename... U>\n"
    85         "    inline explicit WSLMatrix(elems_all_tag, U... elems) thread\n"
    86         "        : WSLMatrix({T(elems)...}, _make_integer_sequence<int, Cols>())\n"
    87         "        {\n"
    88         "        }\n"
    89         "    template <int... C>\n"
    90         "    inline explicit WSLMatrix(initializer_list<T> elems, _integer_sequence<int, C...>) thread\n"
    91         "        : columns{build_full_col(C, elems, _make_integer_sequence<int, Rows>())...}\n"
    92         "    {\n"
    93         "    }\n"
    94         "    template <int... C>\n"
    95         "    inline explicit WSLMatrix(cols_init_tag, initializer_list<vec<T, Rows>> cols, _integer_sequence<int, C...>) constant\n"
    96         "        : columns{(C < cols.size() ? *(cols.begin() + C) : vec<T, Rows>())...}\n"
    97         "    {\n"
    98         "    }\n"
    99         "    template <typename... U>\n"
    100         "    inline explicit WSLMatrix(cols_all_tag, U... cols) constant\n"
    101         "        : columns{ cols... }\n"
    102         "    {\n"
    103         "    }\n"
    104         "    template <typename... U>\n"
    105         "    inline explicit WSLMatrix(elems_all_tag, U... elems) constant\n"
    106         "        : WSLMatrix({T(elems)...}, _make_integer_sequence<int, Cols>())\n"
    107         "        {\n"
    108         "        }\n"
    109         "    template <int... C>\n"
    110         "    inline explicit WSLMatrix(initializer_list<T> elems, _integer_sequence<int, C...>) constant\n"
    111         "        : columns{build_full_col(C, elems, _make_integer_sequence<int, Rows>())...}\n"
    112         "    {\n"
    113         "    }\n"
    11461        "    public:\n"
    11562        "    inline WSLMatrix() thread = default;\n"
    116         "    inline WSLMatrix(initializer_list<vec<T, Rows>> cols) thread\n"
    117         "        : WSLMatrix(cols_init_tag(), cols, _make_integer_sequence<int, Cols>())\n"
    118         "    {\n"
    119         "    }\n"
    120         "    template <typename... U>\n"
    121         "    inline explicit WSLMatrix(U... vals) thread\n"
    122         "        : WSLMatrix(conditional_t<sizeof...(U) == Cols, cols_all_tag, elems_all_tag>(), vals...)\n"
    123         "    {\n"
    124         "    }\n"
    12563        "    inline WSLMatrix() constant = default;\n"
    126         "    inline WSLMatrix(initializer_list<vec<T, Rows>> cols) constant\n"
    127         "        : WSLMatrix(cols_init_tag(), cols, _make_integer_sequence<int, Cols>())\n"
    128         "    {\n"
    129         "    }\n"
    130         "    inline explicit WSLMatrix(T val) constant\n"
    131         "        : WSLMatrix(val, _make_integer_sequence<int, Cols>())\n"
    132         "    {\n"
    133         "    }\n"
    134         "    template <typename... U>\n"
    135         "    inline explicit WSLMatrix(U... vals) constant\n"
    136         "        : WSLMatrix(conditional_t<sizeof...(U) == Cols, cols_all_tag, elems_all_tag>(), vals...)\n"
    137         "    {\n"
    138         "    }\n"
    13964        "    inline WSLMatrix(const thread WSLMatrix<T, Cols, Rows> &that) thread = default;\n"
    140         "    template <typename U>\n"
    141         "    inline explicit WSLMatrix(const thread WSLMatrix<U, Cols, Rows> &that) thread\n"
    142         "        : WSLMatrix(that, _make_integer_sequence<int, Cols>())\n"
    143         "    {\n"
    144         "    }\n"
    14565        "    inline WSLMatrix(const device WSLMatrix<T, Cols, Rows> &that) thread = default;\n"
    146         "    template <typename U>\n"
    147         "    inline explicit WSLMatrix(const device WSLMatrix<U, Cols, Rows> &that) thread\n"
    148         "        : WSLMatrix(that, _make_integer_sequence<int, Cols>())\n"
    149         "    {\n"
    150         "    }\n"
    15166        "    inline WSLMatrix(const constant WSLMatrix<T, Cols, Rows> &that) thread = default;\n"
    152         "    template <typename U>\n"
    153         "    inline explicit WSLMatrix(const constant WSLMatrix<U, Cols, Rows> &that) thread\n"
    154         "        : WSLMatrix(that, _make_integer_sequence<int, Cols>())\n"
    155         "    {\n"
    156         "    }\n"
    15767        "    inline WSLMatrix(const threadgroup WSLMatrix<T, Cols, Rows> &that) thread = default;\n"
    158         "    template <typename U>\n"
    159         "    inline explicit WSLMatrix(const threadgroup WSLMatrix<U, Cols, Rows> &that) thread\n"
    160         "        : WSLMatrix(that, _make_integer_sequence<int, Cols>())\n"
    161         "    {\n"
    162         "    }\n"
    16368        "    inline WSLMatrix(const thread WSLMatrix<T, Cols, Rows> &that) constant = default;\n"
    164         "    template <typename U>\n"
    165         "    inline explicit WSLMatrix(const thread WSLMatrix<U, Cols, Rows> &that) constant\n"
    166         "        : WSLMatrix(that, _make_integer_sequence<int, Cols>())\n"
    167         "    {\n"
    168         "    }\n"
    16969        "    inline WSLMatrix(const device WSLMatrix<T, Cols, Rows> &that) constant = default;\n"
    170         "    template <typename U>\n"
    171         "    inline explicit WSLMatrix(const device WSLMatrix<U, Cols, Rows> &that) constant\n"
    172         "        : WSLMatrix(that, _make_integer_sequence<int, Cols>())\n"
    173         "    {\n"
    174         "    }\n"
    17570        "    inline WSLMatrix(const constant WSLMatrix<T, Cols, Rows> &that) constant = default;\n"
    176         "    template <typename U>\n"
    177         "    inline explicit WSLMatrix(const constant WSLMatrix<U, Cols, Rows> &that) constant\n"
    178         "        : WSLMatrix(that, _make_integer_sequence<int, Cols>())\n"
    179         "    {\n"
    180         "    }\n"
    18171        "    inline WSLMatrix(const threadgroup WSLMatrix<T, Cols, Rows> &that) constant = default;\n"
    182         "    template <typename U>\n"
    183         "    inline explicit WSLMatrix(const threadgroup WSLMatrix<U, Cols, Rows> &that) constant\n"
    184         "        : WSLMatrix(that, _make_integer_sequence<int, Cols>())\n"
    185         "    {\n"
    186         "    }\n"
    18772        "    public:\n"
    18873        "    inline thread vec<T, Rows> &operator[](int r) thread\n"
  • trunk/Source/WebCore/Modules/webgpu/WHLSL/Metal/WHLSLNativeFunctionWriter.cpp

    r249453 r249816  
    183183
    184184        if (auto* matrixType = asMatrixType(returnType)) {
    185             stringBuilder.append(metalReturnTypeName, '(');
    186             for (size_t i = 0; i < args.size(); ++i) {
    187                 if (i)
    188                     stringBuilder.append(", ");
    189                 stringBuilder.append(args[i]);
     185            // We're either constructing with all individual elements, or with
     186            // vectors for each column.
     187
     188            stringBuilder.append('(');
     189            if (args.size() == matrixType->numberOfMatrixColumns()) {
     190                // Constructing with vectors for each column.
     191                for (size_t i = 0; i < args.size(); ++i) {
     192                    if (i)
     193                        stringBuilder.append(", ");
     194                    stringBuilder.append(resultName, ".columns[", i, "] = ", args[i]);
     195                }
     196            } else {
     197                // Constructing with all elements.
     198                RELEASE_ASSERT(args.size() == matrixType->numberOfMatrixColumns() * matrixType->numberOfMatrixRows());
     199
     200                size_t argNumber = 0;
     201                for (size_t i = 0; i < matrixType->numberOfMatrixColumns(); ++i) {
     202                    for (size_t j = 0; j < matrixType->numberOfMatrixRows(); ++j) {
     203                        if (argNumber)
     204                            stringBuilder.append(", ");
     205                        stringBuilder.append(resultName, ".columns[", i, "][", j, "] = ", args[argNumber]);
     206                        ++argNumber;
     207                    }
     208                }
    190209            }
    191             stringBuilder.append(')');
     210
     211            stringBuilder.append(", ", resultName, ')');
    192212            return;
    193213        }
Note: See TracChangeset for help on using the changeset viewer.