Changeset 248938 in webkit
- Timestamp:
- Aug 20, 2019 11:30:05 PM (5 years ago)
- Location:
- trunk
- Files:
-
- 12 added
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JSTests/ChangeLog
r248906 r248938 1 2019-08-20 Justin Michaud <justin_michaud@apple.com> 2 3 Identify memcpy loops in b3 4 https://bugs.webkit.org/show_bug.cgi?id=200181 5 6 Reviewed by Saam Barati. 7 8 * microbenchmarks/memcpy-loop.js: Added. 9 (doTest): 10 (let.arr1): 11 * microbenchmarks/memcpy-typed-loop-large.js: Added. 12 (doTest): 13 (let.arr1.new.Int32Array.1000000.let.arr2.new.Int32Array.1000000): 14 (arr2): 15 * microbenchmarks/memcpy-typed-loop-small.js: Added. 16 (doTest): 17 (16.let.arr1.new.Int32Array.size.let.arr2.new.Int32Array.size): 18 (16.arr2): 19 * microbenchmarks/memcpy-typed-loop-speculative.js: Added. 20 (doTest): 21 (let.arr1.new.Int32Array.10.let.arr2.new.Int32Array.10): 22 (arr2): 23 * microbenchmarks/memcpy-wasm-large.js: Added. 24 (typeof.WebAssembly.string_appeared_here.eq): 25 (typeof.WebAssembly.string_appeared_here.const.1.new.WebAssembly.Instance.new.WebAssembly.Module.new.Uint8Array): 26 * microbenchmarks/memcpy-wasm-medium.js: Added. 27 (typeof.WebAssembly.string_appeared_here.eq): 28 (typeof.WebAssembly.string_appeared_here.const.1.new.WebAssembly.Instance.new.WebAssembly.Module.new.Uint8Array): 29 * microbenchmarks/memcpy-wasm-small.js: Added. 30 (typeof.WebAssembly.string_appeared_here.eq): 31 (typeof.WebAssembly.string_appeared_here.const.1.new.WebAssembly.Instance.new.WebAssembly.Module.new.Uint8Array): 32 * microbenchmarks/memcpy-wasm.js: Added. 33 (typeof.WebAssembly.string_appeared_here.eq): 34 (typeof.WebAssembly.string_appeared_here.const.1.new.WebAssembly.Instance.new.WebAssembly.Module.new.Uint8Array): 35 * stress/memcpy-typed-loops.js: Added. 36 (noLoop): 37 (invalidStart): 38 (const.size.10.let.arr1.new.Int32Array.size.let.arr2.new.Int32Array.size): 39 (arr2): 40 * wasm/function-tests/memcpy-wasm-loop.js: Added. 41 (0.GetLocal.3.I32Const.1.I32Add.SetLocal.3.Br.1.End.End.End.WebAssembly): 42 (string_appeared_here): 43 1 44 2019-08-20 Yusuke Suzuki <ysuzuki@apple.com> 2 45 -
trunk/Source/JavaScriptCore/ChangeLog
r248933 r248938 1 2019-08-20 Justin Michaud <justin_michaud@apple.com> 2 3 Identify memcpy loops in b3 4 https://bugs.webkit.org/show_bug.cgi?id=200181 5 6 Reviewed by Saam Barati. 7 8 Add a new pass in B3 to identify one type of forward byte copy loop and replace it with a call to a custom version of memcpy 9 that will not cause GC tearing and have the correct behaviour when overlapping regions are passed in. 10 11 Microbenchmarks show memcpy-typed-loop-large is about 6x faster, and everything else is neutral. The optimization is disabled 12 on arm for now, until we add a memcpy implementation for it. 13 14 * JavaScriptCore.xcodeproj/project.pbxproj: 15 * Sources.txt: 16 * b3/B3Generate.cpp: 17 (JSC::B3::generateToAir): 18 * b3/B3ReduceLoopStrength.cpp: Added. 19 (JSC::B3::fastForwardCopy32): 20 (JSC::B3::ReduceLoopStrength::AddrInfo::appendAddr): 21 (JSC::B3::ReduceLoopStrength::ReduceLoopStrength): 22 (JSC::B3::ReduceLoopStrength::reduceByteCopyLoopsToMemcpy): 23 (JSC::B3::ReduceLoopStrength::hoistValue): 24 (JSC::B3::ReduceLoopStrength::run): 25 (JSC::B3::reduceLoopStrength): 26 * b3/B3ReduceLoopStrength.h: Added. 27 * b3/testb3.h: 28 * b3/testb3_1.cpp: 29 (run): 30 * b3/testb3_8.cpp: 31 (testFastForwardCopy32): 32 (testByteCopyLoop): 33 (testByteCopyLoopStartIsLoopDependent): 34 (testByteCopyLoopBoundIsLoopDependent): 35 (addCopyTests): 36 1 37 2019-08-20 Devin Rousso <drousso@apple.com> 2 38 -
trunk/Source/JavaScriptCore/JavaScriptCore.xcodeproj/project.pbxproj
r248925 r248938 1176 1176 70ECA6091AFDBEA200449739 /* TemplateObjectDescriptor.h in Headers */ = {isa = PBXBuildFile; fileRef = 70ECA6041AFDBEA200449739 /* TemplateObjectDescriptor.h */; settings = {ATTRIBUTES = (Private, ); }; }; 1177 1177 72AAF7CE1D0D31B3005E60BE /* JSCustomGetterSetterFunction.h in Headers */ = {isa = PBXBuildFile; fileRef = 72AAF7CC1D0D318B005E60BE /* JSCustomGetterSetterFunction.h */; }; 1178 73E3799422E0EF6500933565 /* B3ReduceLoopStrength.h in Headers */ = {isa = PBXBuildFile; fileRef = 73E3799322E0EF4F00933565 /* B3ReduceLoopStrength.h */; }; 1178 1179 7593C898BE714A64BE93A6E7 /* WasmContextInlines.h in Headers */ = {isa = PBXBuildFile; fileRef = A27958D7FA1142B0AC9E364D /* WasmContextInlines.h */; settings = {ATTRIBUTES = (Private, ); }; }; 1179 1180 790081391E95A8EC0052D7CD /* WasmModule.h in Headers */ = {isa = PBXBuildFile; fileRef = 790081371E95A8EC0052D7CD /* WasmModule.h */; settings = {ATTRIBUTES = (Private, ); }; }; … … 3928 3929 72AAF7CB1D0D318B005E60BE /* JSCustomGetterSetterFunction.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = JSCustomGetterSetterFunction.cpp; sourceTree = "<group>"; }; 3929 3930 72AAF7CC1D0D318B005E60BE /* JSCustomGetterSetterFunction.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = JSCustomGetterSetterFunction.h; sourceTree = "<group>"; }; 3931 73E3799322E0EF4F00933565 /* B3ReduceLoopStrength.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = B3ReduceLoopStrength.h; path = b3/B3ReduceLoopStrength.h; sourceTree = "<group>"; }; 3932 73E3799522E0EF9100933565 /* B3ReduceLoopStrength.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = B3ReduceLoopStrength.cpp; path = b3/B3ReduceLoopStrength.cpp; sourceTree = "<group>"; }; 3930 3933 77B25CB2C3094A92A38E1DB3 /* JSModuleLoader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = JSModuleLoader.h; sourceTree = "<group>"; }; 3931 3934 790081361E95A8EC0052D7CD /* WasmModule.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = WasmModule.cpp; sourceTree = "<group>"; }; … … 5622 5625 43422A641C16221E00E2EB98 /* B3ReduceDoubleToFloat.cpp */, 5623 5626 43422A651C16221E00E2EB98 /* B3ReduceDoubleToFloat.h */, 5627 73E3799522E0EF9100933565 /* B3ReduceLoopStrength.cpp */, 5628 73E3799322E0EF4F00933565 /* B3ReduceLoopStrength.h */, 5624 5629 0FEC85B71BE1462F0080FF74 /* B3ReduceStrength.cpp */, 5625 5630 0FEC85B81BE1462F0080FF74 /* B3ReduceStrength.h */, … … 8893 8898 0F725CAA1C503DED00AD943A /* B3PureCSE.h in Headers */, 8894 8899 43422A671C16267800E2EB98 /* B3ReduceDoubleToFloat.h in Headers */, 8900 73E3799422E0EF6500933565 /* B3ReduceLoopStrength.h in Headers */, 8895 8901 0FEC85BD1BE1462F0080FF74 /* B3ReduceStrength.h in Headers */, 8896 8902 0FEC85351BDACDAC0080FF74 /* B3SlotBaseValue.h in Headers */, -
trunk/Source/JavaScriptCore/Sources.txt
r248878 r248938 157 157 b3/B3PureCSE.cpp 158 158 b3/B3ReduceDoubleToFloat.cpp 159 b3/B3ReduceLoopStrength.cpp 159 160 b3/B3ReduceStrength.cpp 160 161 b3/B3SSACalculator.cpp -
trunk/Source/JavaScriptCore/b3/B3Generate.cpp
r245035 r248938 49 49 #include "B3PureCSE.h" 50 50 #include "B3ReduceDoubleToFloat.h" 51 #include "B3ReduceLoopStrength.h" 51 52 #include "B3ReduceStrength.h" 52 53 #include "B3TimingScope.h" … … 92 93 eliminateDeadCode(procedure); 93 94 inferSwitches(procedure); 95 reduceLoopStrength(procedure); 94 96 if (Options::useB3TailDup()) 95 97 duplicateTails(procedure); -
trunk/Source/JavaScriptCore/b3/testb3.h
r248846 r248938 49 49 #include "B3NativeTraits.h" 50 50 #include "B3Procedure.h" 51 #include "B3ReduceLoopStrength.h" 51 52 #include "B3ReduceStrength.h" 52 53 #include "B3SlotBaseValue.h" … … 1017 1018 void addTupleTests(const char* filter, Deque<RefPtr<SharedTask<void()>>>&); 1018 1019 1020 void testFastForwardCopy32(); 1021 void testByteCopyLoop(); 1022 void testByteCopyLoopStartIsLoopDependent(); 1023 void testByteCopyLoopBoundIsLoopDependent(); 1024 1025 void addCopyTests(const char* filter, Deque<RefPtr<SharedTask<void()>>>&); 1026 1019 1027 bool shouldRun(const char* filter, const char* testName); 1020 1028 -
trunk/Source/JavaScriptCore/b3/testb3_1.cpp
r248710 r248938 539 539 addTupleTests(filter, tasks); 540 540 541 addCopyTests(filter, tasks); 542 541 543 RUN(testSpillGP()); 542 544 RUN(testSpillFP()); -
trunk/Source/JavaScriptCore/b3/testb3_8.cpp
r248178 r248938 867 867 } 868 868 869 void testFastForwardCopy32() 870 { 871 #if CPU(X86_64) 872 for (const bool aligned : { true, false }) { 873 for (const bool overlap : { false, true }) { 874 for (size_t arrsize : { 1, 4, 5, 6, 8, 10, 12, 16, 20, 40, 100, 1000}) { 875 size_t overlapAmount = 5; 876 877 uint32_t* arr1, *arr2; 878 879 if (overlap) { 880 arr1 = new uint32_t[arrsize * 2]; 881 arr2 = arr1 + (arrsize - overlapAmount); 882 } else { 883 arr1 = new uint32_t[arrsize]; 884 arr2 = new uint32_t[arrsize]; 885 } 886 887 if (!aligned && arrsize < 3) 888 continue; 889 if (overlap && arrsize <= overlapAmount + 3) 890 continue; 891 892 if (!aligned) { 893 ++arr1; 894 ++arr2; 895 arrsize -= 1; 896 overlapAmount -= 1; 897 } 898 899 for (size_t i = 0; i < arrsize; ++i) 900 arr1[i] = i; 901 902 fastForwardCopy32(arr2, arr1, arrsize); 903 904 if (overlap) { 905 for (size_t i = 0; i < arrsize - overlapAmount; ++i) 906 CHECK(arr2[i] == i); 907 for (size_t i = arrsize - overlapAmount; i < arrsize; ++i) 908 CHECK(arr2[i] == i - (arrsize - overlapAmount)); 909 } else { 910 for (size_t i = 0; i < arrsize; ++i) 911 CHECK(arr2[i] == i); 912 } 913 914 if (!aligned) { 915 --arr1; 916 --arr2; 917 } 918 919 if (!overlap) { 920 delete[] arr1; 921 delete[] arr2; 922 } else 923 delete[] arr1; 924 } 925 } 926 } 927 #endif 928 } 929 930 void testByteCopyLoop() 931 { 932 Procedure proc; 933 BasicBlock* root = proc.addBlock(); 934 BasicBlock* head = proc.addBlock(); 935 BasicBlock* update = proc.addBlock(); 936 BasicBlock* continuation = proc.addBlock(); 937 938 auto* arraySrc = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0); 939 auto* arrayDst = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR1); 940 auto* arraySize = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR2); 941 auto* one = root->appendNew<Const32Value>(proc, Origin(), 1); 942 auto* two = root->appendNew<Const32Value>(proc, Origin(), 2); 943 UpsilonValue* startingIndex = root->appendNew<UpsilonValue>(proc, Origin(), root->appendNew<Const32Value>(proc, Origin(), 0)); 944 root->appendNew<Value>(proc, Jump, Origin()); 945 root->setSuccessors(FrequentedBlock(head)); 946 947 auto* index = head->appendNew<Value>(proc, Phi, Int32, Origin()); 948 startingIndex->setPhi(index); 949 auto* loadIndex = head->appendNew<Value>(proc, Add, Origin(), arraySrc, head->appendNew<Value>(proc, Shl, Origin(), index, two)); 950 auto* storeIndex = head->appendNew<Value>(proc, Add, Origin(), arrayDst, head->appendNew<Value>(proc, Shl, Origin(), index, two)); 951 head->appendNew<MemoryValue>(proc, Store, Origin(), head->appendNew<MemoryValue>(proc, Load, Int32, Origin(), loadIndex), storeIndex); 952 auto* newIndex = head->appendNew<Value>(proc, Add, Origin(), index, one); 953 auto* cmpValue = head->appendNew<Value>(proc, GreaterThan, Origin(), newIndex, arraySize); 954 head->appendNew<Value>(proc, Branch, Origin(), cmpValue); 955 head->setSuccessors(FrequentedBlock(continuation), FrequentedBlock(update)); 956 957 UpsilonValue* updateIndex = update->appendNew<UpsilonValue>(proc, Origin(), newIndex); 958 updateIndex->setPhi(index); 959 update->appendNew<Value>(proc, Jump, Origin()); 960 update->setSuccessors(FrequentedBlock(head)); 961 962 continuation->appendNewControlValue(proc, Return, Origin()); 963 964 int* arr1 = new int[3]; 965 int* arr2 = new int[3]; 966 967 arr1[0] = 0; 968 arr1[1] = 0; 969 arr1[2] = 0; 970 arr2[0] = 1; 971 arr2[1] = 2; 972 arr2[2] = 3; 973 974 compileAndRun<void>(proc, arr2, arr1, 3); 975 976 CHECK_EQ(arr1[0], 1); 977 CHECK_EQ(arr1[1], 2); 978 CHECK_EQ(arr1[2], 3); 979 980 delete[] arr1; 981 delete [] arr2; 982 } 983 984 void testByteCopyLoopStartIsLoopDependent() 985 { 986 Procedure proc; 987 BasicBlock* root = proc.addBlock(); 988 BasicBlock* head = proc.addBlock(); 989 BasicBlock* update = proc.addBlock(); 990 BasicBlock* continuation = proc.addBlock(); 991 992 auto* arraySrc = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0); 993 auto* arrayDst = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR1); 994 auto* arraySize = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR2); 995 auto* one = root->appendNew<Const32Value>(proc, Origin(), 1); 996 auto* two = root->appendNew<Const32Value>(proc, Origin(), 2); 997 root->appendNew<Value>(proc, Jump, Origin()); 998 root->setSuccessors(FrequentedBlock(head)); 999 1000 UpsilonValue* startingIndex = head->appendNew<UpsilonValue>(proc, Origin(), head->appendNew<Const32Value>(proc, Origin(), 0)); 1001 auto* index = head->appendNew<Value>(proc, Phi, Int32, Origin()); 1002 startingIndex->setPhi(index); 1003 auto* loadIndex = head->appendNew<Value>(proc, Add, Origin(), arraySrc, head->appendNew<Value>(proc, Shl, Origin(), index, two)); 1004 auto* storeIndex = head->appendNew<Value>(proc, Add, Origin(), arrayDst, head->appendNew<Value>(proc, Shl, Origin(), index, two)); 1005 head->appendNew<MemoryValue>(proc, Store, Origin(), head->appendNew<MemoryValue>(proc, Load, Int32, Origin(), loadIndex), storeIndex); 1006 auto* newIndex = head->appendNew<Value>(proc, Add, Origin(), index, one); 1007 auto* cmpValue = head->appendNew<Value>(proc, GreaterThan, Origin(), newIndex, arraySize); 1008 head->appendNew<Value>(proc, Branch, Origin(), cmpValue); 1009 head->setSuccessors(FrequentedBlock(continuation), FrequentedBlock(update)); 1010 1011 UpsilonValue* updateIndex = update->appendNew<UpsilonValue>(proc, Origin(), newIndex); 1012 updateIndex->setPhi(index); 1013 update->appendNew<Value>(proc, Jump, Origin()); 1014 update->setSuccessors(FrequentedBlock(head)); 1015 1016 continuation->appendNewControlValue(proc, Return, Origin()); 1017 1018 int* arr1 = new int[3]; 1019 int* arr2 = new int[3]; 1020 1021 arr1[0] = 0; 1022 arr1[1] = 0; 1023 arr1[2] = 0; 1024 arr2[0] = 1; 1025 arr2[1] = 2; 1026 arr2[2] = 3; 1027 1028 compileAndRun<void>(proc, arr2, arr1, 0); 1029 1030 CHECK_EQ(arr1[0], 1); 1031 CHECK_EQ(arr1[1], 0); 1032 CHECK_EQ(arr1[2], 0); 1033 1034 delete[] arr1; 1035 delete [] arr2; 1036 } 1037 1038 void testByteCopyLoopBoundIsLoopDependent() 1039 { 1040 Procedure proc; 1041 BasicBlock* root = proc.addBlock(); 1042 BasicBlock* head = proc.addBlock(); 1043 BasicBlock* update = proc.addBlock(); 1044 BasicBlock* continuation = proc.addBlock(); 1045 1046 auto* arraySrc = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0); 1047 auto* arrayDst = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR1); 1048 auto* one = root->appendNew<Const32Value>(proc, Origin(), 1); 1049 auto* two = root->appendNew<Const32Value>(proc, Origin(), 2); 1050 UpsilonValue* startingIndex = root->appendNew<UpsilonValue>(proc, Origin(), root->appendNew<Const32Value>(proc, Origin(), 0)); 1051 root->appendNew<Value>(proc, Jump, Origin()); 1052 root->setSuccessors(FrequentedBlock(head)); 1053 1054 auto* index = head->appendNew<Value>(proc, Phi, Int32, Origin()); 1055 startingIndex->setPhi(index); 1056 auto* loadIndex = head->appendNew<Value>(proc, Add, Origin(), arraySrc, head->appendNew<Value>(proc, Shl, Origin(), index, two)); 1057 auto* storeIndex = head->appendNew<Value>(proc, Add, Origin(), arrayDst, head->appendNew<Value>(proc, Shl, Origin(), index, two)); 1058 head->appendNew<MemoryValue>(proc, Store, Origin(), head->appendNew<MemoryValue>(proc, Load, Int32, Origin(), loadIndex), storeIndex); 1059 auto* newIndex = head->appendNew<Value>(proc, Add, Origin(), index, one); 1060 auto* cmpValue = head->appendNew<Value>(proc, GreaterThan, Origin(), newIndex, index); 1061 head->appendNew<Value>(proc, Branch, Origin(), cmpValue); 1062 head->setSuccessors(FrequentedBlock(continuation), FrequentedBlock(update)); 1063 1064 UpsilonValue* updateIndex = update->appendNew<UpsilonValue>(proc, Origin(), newIndex); 1065 updateIndex->setPhi(index); 1066 update->appendNew<Value>(proc, Jump, Origin()); 1067 update->setSuccessors(FrequentedBlock(head)); 1068 1069 continuation->appendNewControlValue(proc, Return, Origin()); 1070 1071 int* arr1 = new int[3]; 1072 int* arr2 = new int[3]; 1073 1074 arr1[0] = 0; 1075 arr1[1] = 0; 1076 arr1[2] = 0; 1077 arr2[0] = 1; 1078 arr2[1] = 2; 1079 arr2[2] = 3; 1080 1081 compileAndRun<void>(proc, arr2, arr1, 3); 1082 1083 CHECK_EQ(arr1[0], 1); 1084 CHECK_EQ(arr1[1], 0); 1085 CHECK_EQ(arr1[2], 0); 1086 1087 delete[] arr1; 1088 delete [] arr2; 1089 } 1090 1091 void addCopyTests(const char* filter, Deque<RefPtr<SharedTask<void()>>>& tasks) 1092 { 1093 RUN(testFastForwardCopy32()); 1094 RUN(testByteCopyLoop()); 1095 RUN(testByteCopyLoopStartIsLoopDependent()); 1096 RUN(testByteCopyLoopBoundIsLoopDependent()); 1097 } 1098 869 1099 #endif // ENABLE(B3_JIT)
Note: See TracChangeset
for help on using the changeset viewer.