Changeset 209120 in webkit
- Timestamp:
- Nov 29, 2016 8:54:04 PM (7 years ago)
- Location:
- trunk/Source
- Files:
-
- 40 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/JavaScriptCore/ChangeLog
r209113 r209120 1 2016-11-29 Commit Queue <commit-queue@webkit.org> 2 3 Unreviewed, rolling out r209058 and r209074. 4 https://bugs.webkit.org/show_bug.cgi?id=165188 5 6 These changes caused API test StringBuilderTest.Equal to crash 7 and/or fail. (Requested by ryanhaddad on #webkit). 8 9 Reverted changesets: 10 11 "Streamline and speed up tokenizer and segmented string 12 classes" 13 https://bugs.webkit.org/show_bug.cgi?id=165003 14 http://trac.webkit.org/changeset/209058 15 16 "REGRESSION (r209058): API test StringBuilderTest.Equal 17 crashing" 18 https://bugs.webkit.org/show_bug.cgi?id=165142 19 http://trac.webkit.org/changeset/209074 20 1 21 2016-11-29 Caitlin Potter <caitp@igalia.com> 2 22 -
trunk/Source/JavaScriptCore/runtime/JSONObject.cpp
r209058 r209120 1 1 /* 2 * Copyright (C) 2009 -2016 Apple Inc. All rights reserved.2 * Copyright (C) 2009, 2016 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 355 355 356 356 if (value.isString()) { 357 builder.appendQuotedJSONString(asString(value)->v iewWithUnderlyingString(*m_exec).view);357 builder.appendQuotedJSONString(asString(value)->value(m_exec)); 358 358 return StringifySucceeded; 359 359 } -
trunk/Source/WTF/ChangeLog
r209110 r209120 1 2016-11-29 Commit Queue <commit-queue@webkit.org> 2 3 Unreviewed, rolling out r209058 and r209074. 4 https://bugs.webkit.org/show_bug.cgi?id=165188 5 6 These changes caused API test StringBuilderTest.Equal to crash 7 and/or fail. (Requested by ryanhaddad on #webkit). 8 9 Reverted changesets: 10 11 "Streamline and speed up tokenizer and segmented string 12 classes" 13 https://bugs.webkit.org/show_bug.cgi?id=165003 14 http://trac.webkit.org/changeset/209058 15 16 "REGRESSION (r209058): API test StringBuilderTest.Equal 17 crashing" 18 https://bugs.webkit.org/show_bug.cgi?id=165142 19 http://trac.webkit.org/changeset/209074 20 1 21 2016-11-29 Simon Fraser <simon.fraser@apple.com> 2 22 -
trunk/Source/WTF/wtf/text/StringBuilder.cpp
r209058 r209120 1 1 /* 2 * Copyright (C) 2010 -2016 Apple Inc. All rights reserved.2 * Copyright (C) 2010, 2013, 2016 Apple Inc. All rights reserved. 3 3 * Copyright (C) 2012 Google Inc. All rights reserved. 4 4 * … … 30 30 #include "IntegerToStringConversion.h" 31 31 #include "MathExtras.h" 32 #include "WTFString.h" 32 33 #include <wtf/dtoa.h> 33 34 … … 38 39 static const unsigned minimumCapacity = 16; 39 40 return std::max(requiredLength, std::max(minimumCapacity, capacity * 2)); 40 }41 42 template<> ALWAYS_INLINE LChar* StringBuilder::bufferCharacters<LChar>()43 {44 ASSERT(m_is8Bit);45 return m_bufferCharacters8;46 }47 48 template<> ALWAYS_INLINE UChar* StringBuilder::bufferCharacters<UChar>()49 {50 ASSERT(!m_is8Bit);51 return m_bufferCharacters16;52 41 } 53 42 … … 109 98 { 110 99 ASSERT(m_is8Bit); 111 112 100 // Copy the existing data into a new buffer, set result to point to the end of the existing data. 113 101 auto buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters8); … … 125 113 { 126 114 ASSERT(!m_is8Bit); 127 128 115 // Copy the existing data into a new buffer, set result to point to the end of the existing data. 129 116 auto buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters16); … … 138 125 // Allocate a new 16 bit buffer, copying in currentCharacters (which is 8 bit and may come 139 126 // from either m_string or m_buffer, neither will be reassigned until the copy has completed). 140 void StringBuilder::allocateBufferUp convert(const LChar* currentCharacters, unsigned requiredLength)127 void StringBuilder::allocateBufferUpConvert(const LChar* currentCharacters, unsigned requiredLength) 141 128 { 142 129 ASSERT(m_is8Bit); 143 130 ASSERT(requiredLength >= m_length); 144 145 131 // Copy the existing data into a new buffer, set result to point to the end of the existing data. 146 132 auto buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters16); … … 156 142 } 157 143 158 template<> void StringBuilder::reallocateBuffer<LChar>(unsigned requiredLength) 144 template <> 145 void StringBuilder::reallocateBuffer<LChar>(unsigned requiredLength) 159 146 { 160 147 // If the buffer has only one ref (by this StringBuilder), reallocate it, … … 172 159 } 173 160 174 template<> void StringBuilder::reallocateBuffer<UChar>(unsigned requiredLength) 161 template <> 162 void StringBuilder::reallocateBuffer<UChar>(unsigned requiredLength) 175 163 { 176 164 // If the buffer has only one ref (by this StringBuilder), reallocate it, … … 179 167 180 168 if (m_buffer->is8Bit()) 181 allocateBufferUp convert(m_buffer->characters8(), requiredLength);169 allocateBufferUpConvert(m_buffer->characters8(), requiredLength); 182 170 else if (m_buffer->hasOneRef()) 183 171 m_buffer = StringImpl::reallocate(m_buffer.releaseNonNull(), requiredLength, m_bufferCharacters16); … … 201 189 if (newCapacity > m_length) { 202 190 if (!m_length) { 203 LChar* nullPlaceholder = nullptr;191 LChar* nullPlaceholder = 0; 204 192 allocateBuffer(nullPlaceholder, newCapacity); 205 193 } else if (m_string.is8Bit()) … … 214 202 // Make 'length' additional capacity be available in m_buffer, update m_string & m_length, 215 203 // return a pointer to the newly allocated storage. 216 template<typename CharacterType> ALWAYS_INLINE CharacterType* StringBuilder::appendUninitialized(unsigned length) 204 template <typename CharType> 205 ALWAYS_INLINE CharType* StringBuilder::appendUninitialized(unsigned length) 217 206 { 218 207 ASSERT(length); … … 229 218 m_string = String(); 230 219 m_length = requiredLength; 231 return bufferCharacters<CharacterType>() + currentLength;232 } 233 234 return appendUninitializedSlow<Char acterType>(requiredLength);220 return getBufferCharacters<CharType>() + currentLength; 221 } 222 223 return appendUninitializedSlow<CharType>(requiredLength); 235 224 } 236 225 237 226 // Make 'length' additional capacity be available in m_buffer, update m_string & m_length, 238 227 // return a pointer to the newly allocated storage. 239 template<typename CharacterType> CharacterType* StringBuilder::appendUninitializedSlow(unsigned requiredLength) 228 template <typename CharType> 229 CharType* StringBuilder::appendUninitializedSlow(unsigned requiredLength) 240 230 { 241 231 ASSERT(requiredLength); … … 244 234 // If the buffer is valid it must be at least as long as the current builder contents! 245 235 ASSERT(m_buffer->length() >= m_length); 246 reallocateBuffer<CharacterType>(expandedCapacity(capacity(), requiredLength)); 236 237 reallocateBuffer<CharType>(expandedCapacity(capacity(), requiredLength)); 247 238 } else { 248 239 ASSERT(m_string.length() == m_length); 249 allocateBuffer(m_length ? m_string.characters<Char acterType>() : nullptr, expandedCapacity(capacity(), requiredLength));250 } 251 252 auto* result = bufferCharacters<CharacterType>() + m_length;240 allocateBuffer(m_length ? m_string.characters<CharType>() : 0, expandedCapacity(capacity(), requiredLength)); 241 } 242 243 CharType* result = getBufferCharacters<CharType>() + m_length; 253 244 m_length = requiredLength; 254 245 ASSERT(m_buffer->length() >= m_length); … … 256 247 } 257 248 258 inline UChar* StringBuilder::appendUninitializedUpconvert(unsigned length)259 {260 unsigned requiredLength = length + m_length;261 if (requiredLength < length)262 CRASH();263 264 if (m_buffer) {265 // If the buffer is valid it must be at least as long as the current builder contents!266 ASSERT(m_buffer->length() >= m_length);267 allocateBufferUpconvert(m_buffer->characters8(), expandedCapacity(capacity(), requiredLength));268 } else {269 ASSERT(m_string.length() == m_length);270 allocateBufferUpconvert(m_string.isNull() ? nullptr : m_string.characters8(), expandedCapacity(capacity(), requiredLength));271 }272 273 auto* result = m_bufferCharacters16 + m_length;274 m_length = requiredLength;275 return result;276 }277 278 249 void StringBuilder::append(const UChar* characters, unsigned length) 279 250 { … … 284 255 285 256 if (m_is8Bit) { 286 if (length == 1 && !(*characters & ~0x FF)) {257 if (length == 1 && !(*characters & ~0xff)) { 287 258 // Append as 8 bit character 288 259 LChar lChar = static_cast<LChar>(*characters); … … 290 261 return; 291 262 } 292 memcpy(appendUninitializedUpconvert(length), characters, static_cast<size_t>(length) * sizeof(UChar)); 263 264 // Calculate the new size of the builder after appending. 265 unsigned requiredLength = length + m_length; 266 if (requiredLength < length) 267 CRASH(); 268 269 if (m_buffer) { 270 // If the buffer is valid it must be at least as long as the current builder contents! 271 ASSERT(m_buffer->length() >= m_length); 272 273 allocateBufferUpConvert(m_buffer->characters8(), expandedCapacity(capacity(), requiredLength)); 274 } else { 275 ASSERT(m_string.length() == m_length); 276 allocateBufferUpConvert(m_string.isNull() ? 0 : m_string.characters8(), expandedCapacity(capacity(), requiredLength)); 277 } 278 279 memcpy(m_bufferCharacters16 + m_length, characters, static_cast<size_t>(length) * sizeof(UChar)); 280 m_length = requiredLength; 293 281 } else 294 282 memcpy(appendUninitialized<UChar>(length), characters, static_cast<size_t>(length) * sizeof(UChar)); 295 296 283 ASSERT(m_buffer->length() >= m_length); 297 284 } … … 304 291 305 292 if (m_is8Bit) { 306 auto* destination = appendUninitialized<LChar>(length); 307 // FIXME: How did we determine a threshold of 8 here was the right one? 308 // Also, this kind of optimization could be useful anywhere else we have a 309 // performance-sensitive code path that calls memcpy. 293 LChar* dest = appendUninitialized<LChar>(length); 310 294 if (length > 8) 311 memcpy(dest ination, characters, length);295 memcpy(dest, characters, static_cast<size_t>(length) * sizeof(LChar)); 312 296 else { 313 297 const LChar* end = characters + length; 314 298 while (characters < end) 315 * destination++ = *characters++;299 *(dest++) = *(characters++); 316 300 } 317 301 } else { 318 auto* destination= appendUninitialized<UChar>(length);302 UChar* dest = appendUninitialized<UChar>(length); 319 303 const LChar* end = characters + length; 320 304 while (characters < end) 321 * destination++ = *characters++;305 *(dest++) = *(characters++); 322 306 } 323 307 } … … 402 386 } 403 387 404 template<typename LengthType, typename CharacterType> static LengthType quotedJSONStringLength(const CharacterType* input, unsigned length) 405 { 406 LengthType quotedLength = 2; 407 for (unsigned i = 0; i < length; ++i) { 408 auto character = input[i]; 409 if (LIKELY(character > 0x1F)) { 410 switch (character) { 411 case '"': 412 case '\\': 413 quotedLength += 2; 414 break; 415 default: 416 ++quotedLength; 417 break; 418 } 419 } else { 420 switch (character) { 421 case '\t': 422 case '\r': 423 case '\n': 424 case '\f': 425 case '\b': 426 quotedLength += 2; 427 break; 428 default: 429 quotedLength += 6; 430 } 431 } 432 } 433 return quotedLength; 434 } 435 436 template<typename CharacterType> static inline unsigned quotedJSONStringLength(const CharacterType* input, unsigned length) 437 { 438 constexpr auto maxSafeLength = (std::numeric_limits<unsigned>::max() - 2) / 6; 439 if (length <= maxSafeLength) 440 return quotedJSONStringLength<unsigned>(input, length); 441 return quotedJSONStringLength<Checked<unsigned>>(input, length).unsafeGet(); 442 } 443 444 template<typename OutputCharacterType, typename InputCharacterType> static inline void appendQuotedJSONStringInternal(OutputCharacterType* output, const InputCharacterType* input, unsigned length) 445 { 446 *output++ = '"'; 447 for (unsigned i = 0; i < length; ++i) { 448 auto character = input[i]; 449 if (LIKELY(character > 0x1F)) { 450 if (UNLIKELY(character == '"' || character == '\\')) 388 template <typename OutputCharacterType, typename InputCharacterType> 389 static void appendQuotedJSONStringInternal(OutputCharacterType*& output, const InputCharacterType* input, unsigned length) 390 { 391 for (const InputCharacterType* end = input + length; input != end; ++input) { 392 if (LIKELY(*input > 0x1F)) { 393 if (*input == '"' || *input == '\\') 451 394 *output++ = '\\'; 452 *output++ = character;395 *output++ = *input; 453 396 continue; 454 397 } 455 switch ( character) {398 switch (*input) { 456 399 case '\t': 457 400 *output++ = '\\'; … … 475 418 break; 476 419 default: 477 ASSERT(!(character & ~0xFF)); 420 ASSERT((*input & 0xFF00) == 0); 421 static const char hexDigits[] = "0123456789abcdef"; 478 422 *output++ = '\\'; 479 423 *output++ = 'u'; 480 424 *output++ = '0'; 481 425 *output++ = '0'; 482 *output++ = upperNibbleToLowercaseASCIIHexDigit(character); 483 *output++ = lowerNibbleToLowercaseASCIIHexDigit(character); 484 break; 485 } 486 } 487 *output = '"'; 488 } 489 490 void StringBuilder::appendQuotedJSONString(StringView string) 491 { 492 unsigned length = string.length(); 493 if (string.is8Bit()) { 494 auto* characters = string.characters8(); 495 if (m_is8Bit) 496 appendQuotedJSONStringInternal(appendUninitialized<LChar>(quotedJSONStringLength(characters, length)), characters, length); 426 *output++ = static_cast<LChar>(hexDigits[(*input >> 4) & 0xF]); 427 *output++ = static_cast<LChar>(hexDigits[*input & 0xF]); 428 break; 429 } 430 } 431 } 432 433 void StringBuilder::appendQuotedJSONString(const String& string) 434 { 435 // Make sure we have enough buffer space to append this string without having 436 // to worry about reallocating in the middle. 437 // The 2 is for the '"' quotes on each end. 438 // The 6 is for characters that need to be \uNNNN encoded. 439 Checked<unsigned> stringLength = string.length(); 440 Checked<unsigned> maximumCapacityRequired = length(); 441 maximumCapacityRequired += 2 + stringLength * 6; 442 unsigned allocationSize = maximumCapacityRequired.unsafeGet(); 443 // This max() is here to allow us to allocate sizes between the range [2^31, 2^32 - 2] because roundUpToPowerOfTwo(1<<31 + some int smaller than 1<<31) == 0. 444 allocationSize = std::max(allocationSize, roundUpToPowerOfTwo(allocationSize)); 445 446 if (is8Bit() && !string.is8Bit()) 447 allocateBufferUpConvert(m_bufferCharacters8, allocationSize); 448 else 449 reserveCapacity(allocationSize); 450 ASSERT(m_buffer->length() >= allocationSize); 451 452 if (is8Bit()) { 453 ASSERT(string.is8Bit()); 454 LChar* output = m_bufferCharacters8 + m_length; 455 *output++ = '"'; 456 appendQuotedJSONStringInternal(output, string.characters8(), string.length()); 457 *output++ = '"'; 458 m_length = output - m_bufferCharacters8; 459 } else { 460 UChar* output = m_bufferCharacters16 + m_length; 461 *output++ = '"'; 462 if (string.is8Bit()) 463 appendQuotedJSONStringInternal(output, string.characters8(), string.length()); 497 464 else 498 appendQuotedJSONStringInternal(appendUninitialized<UChar>(quotedJSONStringLength(characters, length)), characters, length); 499 } else { 500 auto* characters = string.characters16(); 501 if (m_is8Bit) 502 appendQuotedJSONStringInternal(appendUninitializedUpconvert(quotedJSONStringLength(characters, length)), characters, length); 503 else 504 appendQuotedJSONStringInternal(appendUninitialized<UChar>(quotedJSONStringLength(characters, length)), characters, length); 505 } 465 appendQuotedJSONStringInternal(output, string.characters16(), string.length()); 466 *output++ = '"'; 467 m_length = output - m_bufferCharacters16; 468 } 469 ASSERT(m_buffer->length() >= m_length); 506 470 } 507 471 -
trunk/Source/WTF/wtf/text/StringBuilder.h
r209074 r209120 1 1 /* 2 * Copyright (C) 2009-201 6 Apple Inc. All rights reserved.2 * Copyright (C) 2009-2010, 2012-2013, 2016 Apple Inc. All rights reserved. 3 3 * Copyright (C) 2012 Google Inc. All rights reserved. 4 4 * … … 25 25 */ 26 26 27 #pragma once 28 27 #ifndef StringBuilder_h 28 #define StringBuilder_h 29 30 #include <wtf/text/AtomicString.h> 29 31 #include <wtf/text/StringView.h> 32 #include <wtf/text/WTFString.h> 30 33 31 34 namespace WTF { 32 35 33 36 class StringBuilder { 34 // Disallow copying since it's expensive and we don't want anyone to do it by accident.37 // Disallow copying since it's expensive and we don't want code to do it by accident. 35 38 WTF_MAKE_NONCOPYABLE(StringBuilder); 36 39 37 40 public: 38 StringBuilder() = default; 41 StringBuilder() 42 : m_length(0) 43 , m_is8Bit(true) 44 , m_bufferCharacters8(0) 45 { 46 } 39 47 40 48 WTF_EXPORT_PRIVATE void append(const UChar*, unsigned); … … 43 51 ALWAYS_INLINE void append(const char* characters, unsigned length) { append(reinterpret_cast<const LChar*>(characters), length); } 44 52 45 void append(const AtomicString& atomicString) { append(atomicString.string()); } 53 void append(const AtomicString& atomicString) 54 { 55 append(atomicString.string()); 56 } 46 57 47 58 void append(const String& string) 48 59 { 49 unsigned length = string.length(); 50 if (!length) 51 return; 52 53 // If we're appending to an empty string, and there is not a buffer 54 // (reserveCapacity has not been called) then just retain the string. 60 if (!string.length()) 61 return; 62 63 // If we're appending to an empty string, and there is not a buffer (reserveCapacity has not been called) 64 // then just retain the string. 55 65 if (!m_length && !m_buffer) { 56 66 m_string = string; 57 m_length = length;58 m_is8Bit = string.is8Bit();67 m_length = string.length(); 68 m_is8Bit = m_string.is8Bit(); 59 69 return; 60 70 } 61 71 62 72 if (string.is8Bit()) 63 append(string.characters8(), length);73 append(string.characters8(), string.length()); 64 74 else 65 append(string.characters16(), length);75 append(string.characters16(), string.length()); 66 76 } 67 77 … … 71 81 return; 72 82 73 // If we're appending to an empty string, and there is not a buffer 74 // (reserveCapacity has not been called)then just retain the string.83 // If we're appending to an empty string, and there is not a buffer (reserveCapacity has not been called) 84 // then just retain the string. 75 85 if (!m_length && !m_buffer && !other.m_string.isNull()) { 76 86 m_string = other.m_string; 77 87 m_length = other.m_length; 78 m_is8Bit = other.m_is8Bit;79 88 return; 80 89 } … … 97 106 WTF_EXPORT_PRIVATE void append(CFStringRef); 98 107 #endif 99 100 108 #if USE(CF) && defined(__OBJC__) 101 109 void append(NSString *string) { append((__bridge CFStringRef)string); } … … 104 112 void append(const String& string, unsigned offset, unsigned length) 105 113 { 106 ASSERT(offset <= string.length()); 107 ASSERT(offset + length <= string.length()); 108 109 if (!length) 110 return; 111 112 // If we're appending to an empty string, and there is not a buffer 113 // (reserveCapacity has not been called) then just retain the string. 114 if (!offset && !m_length && !m_buffer && length == string.length()) { 115 m_string = string; 116 m_length = length; 117 m_is8Bit = string.is8Bit(); 118 return; 119 } 114 if (!string.length()) 115 return; 116 117 if ((offset + length) > string.length()) 118 return; 120 119 121 120 if (string.is8Bit()) … … 131 130 } 132 131 133 void append(UChar c haracter)132 void append(UChar c) 134 133 { 135 134 if (m_buffer && m_length < m_buffer->length() && m_string.isNull()) { 136 135 if (!m_is8Bit) { 137 m_bufferCharacters16[m_length++] = c haracter;136 m_bufferCharacters16[m_length++] = c; 138 137 return; 139 138 } 140 if (!(character & ~0xFF)) { 141 m_bufferCharacters8[m_length++] = static_cast<LChar>(character); 139 140 if (!(c & ~0xff)) { 141 m_bufferCharacters8[m_length++] = static_cast<LChar>(c); 142 142 return; 143 143 } 144 144 } 145 append(&c haracter, 1);146 } 147 148 void append(LChar c haracter)145 append(&c, 1); 146 } 147 148 void append(LChar c) 149 149 { 150 150 if (m_buffer && m_length < m_buffer->length() && m_string.isNull()) { 151 151 if (m_is8Bit) 152 m_bufferCharacters8[m_length++] = c haracter;152 m_bufferCharacters8[m_length++] = c; 153 153 else 154 m_bufferCharacters16[m_length++] = c haracter;154 m_bufferCharacters16[m_length++] = c; 155 155 } else 156 append(&character, 1); 157 } 158 159 void append(char character) { append(static_cast<LChar>(character)); } 156 append(&c, 1); 157 } 158 159 void append(char c) 160 { 161 append(static_cast<LChar>(c)); 162 } 160 163 161 164 void append(UChar32 c) … … 169 172 } 170 173 171 WTF_EXPORT_PRIVATE void appendQuotedJSONString(StringView); 172 173 template<unsigned charactersCount> ALWAYS_INLINE void appendLiteral(const char (&characters)[charactersCount]) { append(characters, charactersCount - 1); } 174 WTF_EXPORT_PRIVATE void appendQuotedJSONString(const String&); 175 176 template<unsigned charactersCount> 177 ALWAYS_INLINE void appendLiteral(const char (&characters)[charactersCount]) { append(characters, charactersCount - 1); } 174 178 175 179 WTF_EXPORT_PRIVATE void appendNumber(int); … … 217 221 } 218 222 219 unsigned length() const { return m_length; } 223 unsigned length() const 224 { 225 return m_length; 226 } 227 220 228 bool isEmpty() const { return !m_length; } 221 229 222 230 WTF_EXPORT_PRIVATE void reserveCapacity(unsigned newCapacity); 223 231 224 unsigned capacity() const { return m_buffer ? m_buffer->length() : m_length; } 232 unsigned capacity() const 233 { 234 return m_buffer ? m_buffer->length() : m_length; 235 } 225 236 226 237 WTF_EXPORT_PRIVATE void resize(unsigned newSize); 238 227 239 WTF_EXPORT_PRIVATE bool canShrink() const; 240 228 241 WTF_EXPORT_PRIVATE void shrinkToFit(); 229 242 … … 282 295 void allocateBuffer(const LChar* currentCharacters, unsigned requiredLength); 283 296 void allocateBuffer(const UChar* currentCharacters, unsigned requiredLength); 284 void allocateBufferUpconvert(const LChar* currentCharacters, unsigned requiredLength); 285 template<typename CharacterType> void reallocateBuffer(unsigned requiredLength); 286 UChar* appendUninitializedUpconvert(unsigned length); 287 template<typename CharacterType> CharacterType* appendUninitialized(unsigned length); 288 template<typename CharacterType> CharacterType* appendUninitializedSlow(unsigned length); 289 template<typename CharacterType> CharacterType* bufferCharacters(); 297 void allocateBufferUpConvert(const LChar* currentCharacters, unsigned requiredLength); 298 template <typename CharType> 299 void reallocateBuffer(unsigned requiredLength); 300 template <typename CharType> 301 ALWAYS_INLINE CharType* appendUninitialized(unsigned length); 302 template <typename CharType> 303 CharType* appendUninitializedSlow(unsigned length); 304 template <typename CharType> 305 ALWAYS_INLINE CharType * getBufferCharacters(); 290 306 WTF_EXPORT_PRIVATE void reifyString() const; 291 307 292 unsigned m_length { 0 };308 unsigned m_length; 293 309 mutable String m_string; 294 310 RefPtr<StringImpl> m_buffer; 295 bool m_is8Bit { true };311 bool m_is8Bit; 296 312 union { 297 LChar* m_bufferCharacters8 { nullptr };313 LChar* m_bufferCharacters8; 298 314 UChar* m_bufferCharacters16; 299 315 }; 300 316 }; 301 317 302 template<typename StringType> bool equal(const StringBuilder&, const StringType&); 303 bool equal(const StringBuilder&, const String&); // Only needed because is8Bit dereferences nullptr when the string is null. 304 template<typename CharacterType> bool equal(const StringBuilder&, const CharacterType*, unsigned length); 305 306 bool operator==(const StringBuilder&, const StringBuilder&); 307 bool operator!=(const StringBuilder&, const StringBuilder&); 308 bool operator==(const StringBuilder&, const String&); 309 bool operator!=(const StringBuilder&, const String&); 310 bool operator==(const String&, const StringBuilder&); 311 bool operator!=(const String&, const StringBuilder&); 312 313 template<typename CharacterType> inline bool equal(const StringBuilder& s, const CharacterType* buffer, unsigned length) 318 template <> 319 ALWAYS_INLINE LChar* StringBuilder::getBufferCharacters<LChar>() 320 { 321 ASSERT(m_is8Bit); 322 return m_bufferCharacters8; 323 } 324 325 template <> 326 ALWAYS_INLINE UChar* StringBuilder::getBufferCharacters<UChar>() 327 { 328 ASSERT(!m_is8Bit); 329 return m_bufferCharacters16; 330 } 331 332 template <typename CharType> 333 bool equal(const StringBuilder& s, const CharType* buffer, unsigned length) 314 334 { 315 335 if (s.length() != length) … … 322 342 } 323 343 324 template<typename StringType> inline bool equal(const StringBuilder& a, const StringType& b) 344 template <typename StringType> 345 bool equal(const StringBuilder& a, const StringType& b) 325 346 { 326 return equalCommon(a, b); 327 } 328 329 inline bool equal(const StringBuilder& a, const String& b) 330 { 331 return !b.isNull() && equalCommon(a, b); 347 if (a.length() != b.length()) 348 return false; 349 350 if (!a.length()) 351 return true; 352 353 if (a.is8Bit()) { 354 if (b.is8Bit()) 355 return equal(a.characters8(), b.characters8(), a.length()); 356 return equal(a.characters8(), b.characters16(), a.length()); 357 } 358 359 if (b.is8Bit()) 360 return equal(a.characters16(), b.characters8(), a.length()); 361 return equal(a.characters16(), b.characters16(), a.length()); 332 362 } 333 363 … … 342 372 343 373 using WTF::StringBuilder; 374 375 #endif // StringBuilder_h -
trunk/Source/WebCore/ChangeLog
r209118 r209120 1 2016-11-29 Commit Queue <commit-queue@webkit.org> 2 3 Unreviewed, rolling out r209058 and r209074. 4 https://bugs.webkit.org/show_bug.cgi?id=165188 5 6 These changes caused API test StringBuilderTest.Equal to crash 7 and/or fail. (Requested by ryanhaddad on #webkit). 8 9 Reverted changesets: 10 11 "Streamline and speed up tokenizer and segmented string 12 classes" 13 https://bugs.webkit.org/show_bug.cgi?id=165003 14 http://trac.webkit.org/changeset/209058 15 16 "REGRESSION (r209058): API test StringBuilderTest.Equal 17 crashing" 18 https://bugs.webkit.org/show_bug.cgi?id=165142 19 http://trac.webkit.org/changeset/209074 20 1 21 2016-11-29 Nan Wang <n_wang@apple.com> 2 22 -
trunk/Source/WebCore/bindings/js/JSHTMLDocumentCustom.cpp
r209058 r209120 1 1 /* 2 * Copyright (C) 2007-20 16 Apple Inc. All rights reserved.2 * Copyright (C) 2007-2009, 2016 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 27 27 #include "JSHTMLDocument.h" 28 28 29 #include "Frame.h" 30 #include "HTMLCollection.h" 31 #include "HTMLDocument.h" 32 #include "HTMLElement.h" 29 33 #include "HTMLIFrameElement.h" 34 #include "HTMLNames.h" 35 #include "JSDOMWindow.h" 30 36 #include "JSDOMWindowCustom.h" 37 #include "JSDOMWindowShell.h" 38 #include "JSDocumentCustom.h" 31 39 #include "JSHTMLCollection.h" 40 #include "JSMainThreadExecState.h" 32 41 #include "SegmentedString.h" 42 #include "DocumentParser.h" 43 #include <interpreter/StackVisitor.h> 44 #include <runtime/Error.h> 45 #include <runtime/JSCell.h> 46 #include <wtf/unicode/CharacterNames.h> 33 47 34 48 using namespace JSC; … … 41 55 { 42 56 auto& document = passedDocument.get(); 43 auto* wrapper = createWrapper<HTMLDocument>(globalObject, WTFMove(passedDocument)); 57 JSObject* wrapper = createWrapper<HTMLDocument>(globalObject, WTFMove(passedDocument)); 58 44 59 reportMemoryForDocumentIfFrameless(*state, document); 60 45 61 return wrapper; 46 62 } … … 53 69 } 54 70 55 bool JSHTMLDocument::getOwnPropertySlot(JSObject* object, ExecState* state, PropertyName propertyName, PropertySlot& slot)56 { 57 auto& thisObject = *jsCast<JSHTMLDocument*>(object);58 ASSERT_GC_OBJECT_INHERITS( (&thisObject), info());71 bool JSHTMLDocument::getOwnPropertySlot(JSObject* object, ExecState* exec, PropertyName propertyName, PropertySlot& slot) 72 { 73 JSHTMLDocument* thisObject = jsCast<JSHTMLDocument*>(object); 74 ASSERT_GC_OBJECT_INHERITS(thisObject, info()); 59 75 60 76 if (propertyName == "open") { 61 if (Base::getOwnPropertySlot( &thisObject, state, propertyName, slot))77 if (Base::getOwnPropertySlot(thisObject, exec, propertyName, slot)) 62 78 return true; 63 slot.setCustom(&thisObject, ReadOnly | DontDelete | DontEnum, nonCachingStaticFunctionGetter<jsHTMLDocumentPrototypeFunctionOpen, 2>); 79 80 slot.setCustom(thisObject, ReadOnly | DontDelete | DontEnum, nonCachingStaticFunctionGetter<jsHTMLDocumentPrototypeFunctionOpen, 2>); 64 81 return true; 65 82 } 66 83 67 84 JSValue value; 68 if (thisObject .nameGetter(state, propertyName, value)) {69 slot.setValue( &thisObject, ReadOnly | DontDelete | DontEnum, value);85 if (thisObject->nameGetter(exec, propertyName, value)) { 86 slot.setValue(thisObject, ReadOnly | DontDelete | DontEnum, value); 70 87 return true; 71 88 } 72 89 73 return Base::getOwnPropertySlot( &thisObject, state, propertyName, slot);74 } 75 76 bool JSHTMLDocument::nameGetter(ExecState* state, PropertyName propertyName, JSValue& value)90 return Base::getOwnPropertySlot(thisObject, exec, propertyName, slot); 91 } 92 93 bool JSHTMLDocument::nameGetter(ExecState* exec, PropertyName propertyName, JSValue& value) 77 94 { 78 95 auto& document = wrapped(); 79 96 80 auto* atomicPropertyName = propertyName.publicName();97 AtomicStringImpl* atomicPropertyName = propertyName.publicName(); 81 98 if (!atomicPropertyName || !document.hasDocumentNamedItem(*atomicPropertyName)) 82 99 return false; 83 100 84 101 if (UNLIKELY(document.documentNamedItemContainsMultipleElements(*atomicPropertyName))) { 85 autocollection = document.documentNamedItems(atomicPropertyName);102 Ref<HTMLCollection> collection = document.documentNamedItems(atomicPropertyName); 86 103 ASSERT(collection->length() > 1); 87 value = toJS( state, globalObject(), collection);104 value = toJS(exec, globalObject(), collection); 88 105 return true; 89 106 } 90 107 91 auto& element = *document.documentNamedItem(*atomicPropertyName);108 Element& element = *document.documentNamedItem(*atomicPropertyName); 92 109 if (UNLIKELY(is<HTMLIFrameElement>(element))) { 93 if ( auto* frame = downcast<HTMLIFrameElement>(element).contentFrame()) {94 value = toJS( state, frame);110 if (Frame* frame = downcast<HTMLIFrameElement>(element).contentFrame()) { 111 value = toJS(exec, frame); 95 112 return true; 96 113 } 97 114 } 98 115 99 value = toJS( state, globalObject(), element);116 value = toJS(exec, globalObject(), element); 100 117 return true; 101 118 } … … 106 123 { 107 124 // If "all" has been overwritten, return the overwritten value 108 if (auto overwrittenValue = getDirect(state.vm(), Identifier::fromString(&state, "all"))) 109 return overwrittenValue; 125 JSValue v = getDirect(state.vm(), Identifier::fromString(&state, "all")); 126 if (v) 127 return v; 110 128 111 129 return toJS(&state, globalObject(), wrapped().all()); … … 118 136 } 119 137 120 static inlineDocument* findCallingDocument(ExecState& state)138 static Document* findCallingDocument(ExecState& state) 121 139 { 122 140 CallerFunctor functor; 123 141 state.iterate(functor); 124 auto* callerFrame = functor.callerFrame();142 CallFrame* callerFrame = functor.callerFrame(); 125 143 if (!callerFrame) 126 144 return nullptr; 127 return asJSDOMWindow(callerFrame->lexicalGlobalObject())->wrapped().document(); 145 146 return asJSDOMWindow(functor.callerFrame()->lexicalGlobalObject())->wrapped().document(); 128 147 } 129 148 … … 137 156 // For compatibility with other browsers, pass open calls with more than 2 parameters to the window. 138 157 if (state.argumentCount() > 2) { 139 if (auto* frame = wrapped().frame()) { 140 if (auto* wrapper = toJSDOMWindowShell(frame, currentWorld(&state))) { 141 auto function = wrapper->get(&state, Identifier::fromString(&state, "open")); 158 if (Frame* frame = wrapped().frame()) { 159 JSDOMWindowShell* wrapper = toJSDOMWindowShell(frame, currentWorld(&state)); 160 if (wrapper) { 161 JSValue function = wrapper->get(&state, Identifier::fromString(&state, "open")); 142 162 CallData callData; 143 autocallType = ::getCallData(function, callData);163 CallType callType = ::getCallData(function, callData); 144 164 if (callType == CallType::None) 145 165 return throwTypeError(&state, scope); … … 150 170 } 151 171 152 // Calling document.open clobbers the security context of the document and aliases it with the active security context. 153 // FIXME: Is it correct that this does not use findCallingDocument as the write function below does? 154 wrapped().open(asJSDOMWindow(state.lexicalGlobalObject())->wrapped().document()); 155 // FIXME: Why do we return the document instead of returning undefined? 172 // document.open clobbers the security context of the document and 173 // aliases it with the active security context. 174 Document* activeDocument = asJSDOMWindow(state.lexicalGlobalObject())->wrapped().document(); 175 176 // In the case of two parameters or fewer, do a normal document open. 177 wrapped().open(activeDocument); 156 178 return this; 157 179 } … … 159 181 enum NewlineRequirement { DoNotAddNewline, DoAddNewline }; 160 182 161 static inline JSValue documentWrite(ExecState& state, JSHTMLDocument& document, NewlineRequirement addNewline) 162 { 163 VM& vm = state.vm(); 164 auto scope = DECLARE_THROW_SCOPE(vm); 165 166 SegmentedString segmentedString; 167 size_t argumentCount = state.argumentCount(); 168 for (size_t i = 0; i < argumentCount; ++i) { 169 segmentedString.append(state.uncheckedArgument(i).toWTFString(&state)); 170 RETURN_IF_EXCEPTION(scope, { }); 183 static inline void documentWrite(ExecState& state, JSHTMLDocument* thisDocument, NewlineRequirement addNewline) 184 { 185 HTMLDocument* document = &thisDocument->wrapped(); 186 // DOM only specifies single string argument, but browsers allow multiple or no arguments. 187 188 size_t size = state.argumentCount(); 189 190 String firstString = state.argument(0).toString(&state)->value(&state); 191 SegmentedString segmentedString = firstString; 192 if (size != 1) { 193 if (!size) 194 segmentedString.clear(); 195 else { 196 for (size_t i = 1; i < size; ++i) { 197 String subsequentString = state.uncheckedArgument(i).toString(&state)->value(&state); 198 segmentedString.append(SegmentedString(subsequentString)); 199 } 200 } 171 201 } 172 202 if (addNewline) 173 segmentedString.append(String { "\n" }); 174 175 document.wrapped().write(WTFMove(segmentedString), findCallingDocument(state)); 203 segmentedString.append(SegmentedString(String(&newlineCharacter, 1))); 204 205 Document* activeDocument = findCallingDocument(state); 206 document->write(segmentedString, activeDocument); 207 } 208 209 JSValue JSHTMLDocument::write(ExecState& state) 210 { 211 documentWrite(state, this, DoNotAddNewline); 176 212 return jsUndefined(); 177 213 } 178 214 179 JSValue JSHTMLDocument::write(ExecState& state)180 {181 return documentWrite(state, *this, DoNotAddNewline);182 }183 184 215 JSValue JSHTMLDocument::writeln(ExecState& state) 185 216 { 186 return documentWrite(state, *this, DoAddNewline); 217 documentWrite(state, this, DoAddNewline); 218 return jsUndefined(); 187 219 } 188 220 -
trunk/Source/WebCore/css/parser/CSSTokenizer.cpp
r209058 r209120 36 36 #include "CSSTokenizerInputStream.h" 37 37 #include "HTMLParserIdioms.h" 38 #include <wtf/text/StringBuilder.h>39 38 #include <wtf/unicode/CharacterNames.h> 40 39 -
trunk/Source/WebCore/css/parser/CSSTokenizer.h
r209058 r209120 31 31 32 32 #include "CSSParserToken.h" 33 #include "InputStreamPreprocessor.h" 33 34 #include <climits> 34 35 #include <wtf/text/StringView.h> -
trunk/Source/WebCore/css/parser/CSSTokenizerInputStream.h
r209058 r209120 31 31 32 32 #include <wtf/text/StringView.h> 33 #include <wtf/text/WTFString.h> 33 34 34 35 namespace WebCore { 35 36 constexpr LChar kEndOfFileMarker = 0;37 36 38 37 class CSSTokenizerInputStream { -
trunk/Source/WebCore/dom/Document.cpp
r209058 r209120 2792 2792 } 2793 2793 2794 void Document::write( SegmentedString&& text, Document* ownerDocument)2794 void Document::write(const SegmentedString& text, Document* ownerDocument) 2795 2795 { 2796 2796 NestingLevelIncrementer nestingLevelIncrementer(m_writeRecursionDepth); … … 2800 2800 2801 2801 if (m_writeRecursionIsTooDeep) 2802 2802 return; 2803 2803 2804 2804 bool hasInsertionPoint = m_parser && m_parser->hasInsertionPoint(); … … 2810 2810 2811 2811 ASSERT(m_parser); 2812 m_parser->insert( WTFMove(text));2812 m_parser->insert(text); 2813 2813 } 2814 2814 2815 2815 void Document::write(const String& text, Document* ownerDocument) 2816 2816 { 2817 write(SegmentedString { text }, ownerDocument);2817 write(SegmentedString(text), ownerDocument); 2818 2818 } 2819 2819 2820 2820 void Document::writeln(const String& text, Document* ownerDocument) 2821 2821 { 2822 SegmentedString textWithNewline { text }; 2823 textWithNewline.append(String { "\n" }); 2824 write(WTFMove(textWithNewline), ownerDocument); 2822 write(text, ownerDocument); 2823 write("\n", ownerDocument); 2825 2824 } 2826 2825 -
trunk/Source/WebCore/dom/Document.h
r209058 r209120 603 603 void cancelParsing(); 604 604 605 void write( SegmentedString&& text, Document* ownerDocument = nullptr);605 void write(const SegmentedString& text, Document* ownerDocument = nullptr); 606 606 WEBCORE_EXPORT void write(const String& text, Document* ownerDocument = nullptr); 607 607 WEBCORE_EXPORT void writeln(const String& text, Document* ownerDocument = nullptr); -
trunk/Source/WebCore/dom/DocumentParser.h
r209058 r209120 44 44 45 45 // insert is used by document.write. 46 virtual void insert( SegmentedString&&) = 0;46 virtual void insert(const SegmentedString&) = 0; 47 47 48 48 // appendBytes and flush are used by DocumentWriter (the loader). -
trunk/Source/WebCore/dom/RawDataDocumentParser.h
r209058 r209120 50 50 } 51 51 52 void insert( SegmentedString&&) override52 void insert(const SegmentedString&) override 53 53 { 54 54 // <https://bugs.webkit.org/show_bug.cgi?id=25397>: JS code can always call document.write, we need to handle it. -
trunk/Source/WebCore/html/FTPDirectoryDocument.cpp
r209058 r209120 345 345 void FTPDirectoryDocumentParser::append(RefPtr<StringImpl>&& inputSource) 346 346 { 347 String source(WTFMove(inputSource)); 348 347 349 // Make sure we have the table element to append to by loading the template set in the pref, or 348 350 // creating a very basic document with the appropriate table … … 356 358 357 359 m_dest = m_buffer; 358 SegmentedString str ing { String { WTFMove(inputSource) } };359 while (!str ing.isEmpty()) {360 UChar c = str ing.currentCharacter();360 SegmentedString str = source; 361 while (!str.isEmpty()) { 362 UChar c = str.currentChar(); 361 363 362 364 if (c == '\r') { … … 375 377 } 376 378 377 str ing.advance();379 str.advance(); 378 380 379 381 // Maybe enlarge the buffer -
trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp
r209058 r209120 329 329 } 330 330 331 void HTMLDocumentParser::insert( SegmentedString&& source)331 void HTMLDocumentParser::insert(const SegmentedString& source) 332 332 { 333 333 if (isStopped()) … … 338 338 Ref<HTMLDocumentParser> protectedThis(*this); 339 339 340 source.setExcludeLineNumbers(); 341 m_input.insertAtCurrentInsertionPoint(WTFMove(source)); 340 SegmentedString excludedLineNumberSource(source); 341 excludedLineNumberSource.setExcludeLineNumbers(); 342 m_input.insertAtCurrentInsertionPoint(excludedLineNumberSource); 342 343 pumpTokenizerIfPossible(ForceSynchronous); 343 344 … … 363 364 Ref<HTMLDocumentParser> protectedThis(*this); 364 365 365 String source { WTFMove(inputSource) };366 String source(WTFMove(inputSource)); 366 367 367 368 if (m_preloadScanner) { -
trunk/Source/WebCore/html/parser/HTMLDocumentParser.h
r209058 r209120 66 66 explicit HTMLDocumentParser(HTMLDocument&); 67 67 68 void insert( SegmentedString&&) final;68 void insert(const SegmentedString&) final; 69 69 void append(RefPtr<StringImpl>&&) override; 70 70 void finish() override; -
trunk/Source/WebCore/html/parser/HTMLEntityParser.cpp
r209058 r209120 62 62 HTMLEntitySearch entitySearch; 63 63 while (!source.isEmpty()) { 64 cc = source.currentChar acter();64 cc = source.currentChar(); 65 65 entitySearch.advance(cc); 66 66 if (!entitySearch.isEntityPrefix()) 67 67 break; 68 68 consumedCharacters.append(cc); 69 source.advance PastNonNewline();69 source.advance(); 70 70 } 71 71 notEnoughCharacters = source.isEmpty(); … … 89 89 const LChar* reference = entitySearch.mostRecentMatch()->entity; 90 90 for (int i = 0; i < length; ++i) { 91 cc = source.currentChar acter();91 cc = source.currentChar(); 92 92 ASSERT_UNUSED(reference, cc == *reference++); 93 93 consumedCharacters.append(cc); 94 source.advance PastNonNewline();94 source.advance(); 95 95 ASSERT(!source.isEmpty()); 96 96 } 97 cc = source.currentChar acter();97 cc = source.currentChar(); 98 98 } 99 99 if (entitySearch.mostRecentMatch()->lastCharacter() == ';' -
trunk/Source/WebCore/html/parser/HTMLInputStream.h
r209058 r209120 26 26 #pragma once 27 27 28 #include "InputStreamPreprocessor.h" 28 29 #include "SegmentedString.h" 29 30 #include <wtf/text/TextPosition.h> … … 56 57 } 57 58 58 void appendToEnd( SegmentedString&& string)59 void appendToEnd(const SegmentedString& string) 59 60 { 60 m_last->append( WTFMove(string));61 m_last->append(string); 61 62 } 62 63 63 void insertAtCurrentInsertionPoint( SegmentedString&& string)64 void insertAtCurrentInsertionPoint(const SegmentedString& string) 64 65 { 65 m_first.append( WTFMove(string));66 m_first.append(string); 66 67 } 67 68 … … 73 74 void markEndOfFile() 74 75 { 75 m_last->append(S tring { &kEndOfFileMarker, 1 });76 m_last->append(SegmentedString(String(&kEndOfFileMarker, 1))); 76 77 m_last->close(); 77 78 } … … 92 93 void splitInto(SegmentedString& next) 93 94 { 94 next = WTFMove(m_first); 95 next = m_first; 96 m_first = SegmentedString(); 95 97 if (m_last == &m_first) { 96 98 // We used to only have one SegmentedString in the InputStream -
trunk/Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp
r209058 r209120 1 1 /* 2 2 * Copyright (C) 2010 Google Inc. All Rights Reserved. 3 * Copyright (C) 2015 -2016Apple Inc. All Rights Reserved.3 * Copyright (C) 2015 Apple Inc. All Rights Reserved. 4 4 * 5 5 * Redistribution and use in source and binary forms, with or without … … 152 152 // least bytesToCheckUnconditionally bytes of input. 153 153 154 constexprint bytesToCheckUnconditionally = 1024;154 static const int bytesToCheckUnconditionally = 1024; 155 155 156 m_input.append( m_codec->decode(data, length));156 m_input.append(SegmentedString(m_codec->decode(data, length))); 157 157 158 158 while (auto token = m_tokenizer.nextToken(m_input)) { -
trunk/Source/WebCore/html/parser/HTMLSourceTracker.cpp
r209058 r209120 32 32 33 33 namespace WebCore { 34 35 HTMLSourceTracker::HTMLSourceTracker() 36 { 37 } 34 38 35 39 void HTMLSourceTracker::startToken(SegmentedString& currentInput, HTMLTokenizer& tokenizer) … … 75 79 unsigned i = 0; 76 80 for ( ; i < length && !m_previousSource.isEmpty(); ++i) { 77 source.append(m_previousSource.currentChar acter());81 source.append(m_previousSource.currentChar()); 78 82 m_previousSource.advance(); 79 83 } 80 84 for ( ; i < length; ++i) { 81 85 ASSERT(!m_currentSource.isEmpty()); 82 source.append(m_currentSource.currentChar acter());86 source.append(m_currentSource.currentChar()); 83 87 m_currentSource.advance(); 84 88 } -
trunk/Source/WebCore/html/parser/HTMLSourceTracker.h
r209058 r209120 37 37 WTF_MAKE_NONCOPYABLE(HTMLSourceTracker); 38 38 public: 39 HTMLSourceTracker() = default;39 HTMLSourceTracker(); 40 40 41 41 void startToken(SegmentedString&, HTMLTokenizer&); -
trunk/Source/WebCore/html/parser/HTMLTokenizer.cpp
r209058 r209120 1 1 /* 2 * Copyright (C) 2008 -2016Apple Inc. All Rights Reserved.2 * Copyright (C) 2008, 2015 Apple Inc. All Rights Reserved. 3 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ 4 4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. … … 32 32 #include "HTMLNames.h" 33 33 #include "MarkupTokenizerInlines.h" 34 #include <wtf/ text/StringBuilder.h>34 #include <wtf/ASCIICType.h> 35 35 36 36 using namespace WTF; … … 97 97 saveEndTagNameIfNeeded(); 98 98 m_state = DataState; 99 source.advance PastNonNewline();99 source.advanceAndUpdateLineNumber(); 100 100 return true; 101 101 } … … 158 158 bool HTMLTokenizer::commitToPartialEndTag(SegmentedString& source, UChar character, State state) 159 159 { 160 ASSERT(source.currentChar acter() == character);160 ASSERT(source.currentChar() == character); 161 161 appendToTemporaryBuffer(character); 162 source.advance PastNonNewline();162 source.advanceAndUpdateLineNumber(); 163 163 164 164 if (haveBufferedCharacterToken()) { … … 175 175 bool HTMLTokenizer::commitToCompleteEndTag(SegmentedString& source) 176 176 { 177 ASSERT(source.currentChar acter() == '>');177 ASSERT(source.currentChar() == '>'); 178 178 appendToTemporaryBuffer('>'); 179 source.advance PastNonNewline();179 source.advance(); 180 180 181 181 m_state = DataState; … … 213 213 BEGIN_STATE(DataState) 214 214 if (character == '&') 215 ADVANCE_ PAST_NON_NEWLINE_TO(CharacterReferenceInDataState);215 ADVANCE_TO(CharacterReferenceInDataState); 216 216 if (character == '<') { 217 217 if (haveBufferedCharacterToken()) 218 218 RETURN_IN_CURRENT_STATE(true); 219 ADVANCE_ PAST_NON_NEWLINE_TO(TagOpenState);219 ADVANCE_TO(TagOpenState); 220 220 } 221 221 if (character == kEndOfFileMarker) … … 233 233 BEGIN_STATE(RCDATAState) 234 234 if (character == '&') 235 ADVANCE_ PAST_NON_NEWLINE_TO(CharacterReferenceInRCDATAState);235 ADVANCE_TO(CharacterReferenceInRCDATAState); 236 236 if (character == '<') 237 ADVANCE_ PAST_NON_NEWLINE_TO(RCDATALessThanSignState);237 ADVANCE_TO(RCDATALessThanSignState); 238 238 if (character == kEndOfFileMarker) 239 239 RECONSUME_IN(DataState); … … 250 250 BEGIN_STATE(RAWTEXTState) 251 251 if (character == '<') 252 ADVANCE_ PAST_NON_NEWLINE_TO(RAWTEXTLessThanSignState);252 ADVANCE_TO(RAWTEXTLessThanSignState); 253 253 if (character == kEndOfFileMarker) 254 254 RECONSUME_IN(DataState); … … 259 259 BEGIN_STATE(ScriptDataState) 260 260 if (character == '<') 261 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataLessThanSignState);261 ADVANCE_TO(ScriptDataLessThanSignState); 262 262 if (character == kEndOfFileMarker) 263 263 RECONSUME_IN(DataState); … … 275 275 BEGIN_STATE(TagOpenState) 276 276 if (character == '!') 277 ADVANCE_ PAST_NON_NEWLINE_TO(MarkupDeclarationOpenState);277 ADVANCE_TO(MarkupDeclarationOpenState); 278 278 if (character == '/') 279 ADVANCE_ PAST_NON_NEWLINE_TO(EndTagOpenState);279 ADVANCE_TO(EndTagOpenState); 280 280 if (isASCIIAlpha(character)) { 281 281 m_token.beginStartTag(convertASCIIAlphaToLower(character)); 282 ADVANCE_ PAST_NON_NEWLINE_TO(TagNameState);282 ADVANCE_TO(TagNameState); 283 283 } 284 284 if (character == '?') { … … 298 298 m_token.beginEndTag(convertASCIIAlphaToLower(character)); 299 299 m_appropriateEndTagName.clear(); 300 ADVANCE_ PAST_NON_NEWLINE_TO(TagNameState);301 } 302 if (character == '>') { 303 parseError(); 304 ADVANCE_ PAST_NON_NEWLINE_TO(DataState);300 ADVANCE_TO(TagNameState); 301 } 302 if (character == '>') { 303 parseError(); 304 ADVANCE_TO(DataState); 305 305 } 306 306 if (character == kEndOfFileMarker) { … … 318 318 ADVANCE_TO(BeforeAttributeNameState); 319 319 if (character == '/') 320 ADVANCE_ PAST_NON_NEWLINE_TO(SelfClosingStartTagState);320 ADVANCE_TO(SelfClosingStartTagState); 321 321 if (character == '>') 322 322 return emitAndResumeInDataState(source); … … 328 328 } 329 329 m_token.appendToName(toASCIILower(character)); 330 ADVANCE_ PAST_NON_NEWLINE_TO(TagNameState);330 ADVANCE_TO(TagNameState); 331 331 END_STATE() 332 332 … … 335 335 m_temporaryBuffer.clear(); 336 336 ASSERT(m_bufferedEndTagName.isEmpty()); 337 ADVANCE_ PAST_NON_NEWLINE_TO(RCDATAEndTagOpenState);337 ADVANCE_TO(RCDATAEndTagOpenState); 338 338 } 339 339 bufferASCIICharacter('<'); … … 345 345 appendToTemporaryBuffer(character); 346 346 appendToPossibleEndTag(convertASCIIAlphaToLower(character)); 347 ADVANCE_ PAST_NON_NEWLINE_TO(RCDATAEndTagNameState);347 ADVANCE_TO(RCDATAEndTagNameState); 348 348 } 349 349 bufferASCIICharacter('<'); … … 356 356 appendToTemporaryBuffer(character); 357 357 appendToPossibleEndTag(convertASCIIAlphaToLower(character)); 358 ADVANCE_ PAST_NON_NEWLINE_TO(RCDATAEndTagNameState);358 ADVANCE_TO(RCDATAEndTagNameState); 359 359 } 360 360 if (isTokenizerWhitespace(character)) { … … 386 386 m_temporaryBuffer.clear(); 387 387 ASSERT(m_bufferedEndTagName.isEmpty()); 388 ADVANCE_ PAST_NON_NEWLINE_TO(RAWTEXTEndTagOpenState);388 ADVANCE_TO(RAWTEXTEndTagOpenState); 389 389 } 390 390 bufferASCIICharacter('<'); … … 396 396 appendToTemporaryBuffer(character); 397 397 appendToPossibleEndTag(convertASCIIAlphaToLower(character)); 398 ADVANCE_ PAST_NON_NEWLINE_TO(RAWTEXTEndTagNameState);398 ADVANCE_TO(RAWTEXTEndTagNameState); 399 399 } 400 400 bufferASCIICharacter('<'); … … 407 407 appendToTemporaryBuffer(character); 408 408 appendToPossibleEndTag(convertASCIIAlphaToLower(character)); 409 ADVANCE_ PAST_NON_NEWLINE_TO(RAWTEXTEndTagNameState);409 ADVANCE_TO(RAWTEXTEndTagNameState); 410 410 } 411 411 if (isTokenizerWhitespace(character)) { … … 437 437 m_temporaryBuffer.clear(); 438 438 ASSERT(m_bufferedEndTagName.isEmpty()); 439 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataEndTagOpenState);439 ADVANCE_TO(ScriptDataEndTagOpenState); 440 440 } 441 441 if (character == '!') { 442 442 bufferASCIICharacter('<'); 443 443 bufferASCIICharacter('!'); 444 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataEscapeStartState);444 ADVANCE_TO(ScriptDataEscapeStartState); 445 445 } 446 446 bufferASCIICharacter('<'); … … 452 452 appendToTemporaryBuffer(character); 453 453 appendToPossibleEndTag(convertASCIIAlphaToLower(character)); 454 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataEndTagNameState);454 ADVANCE_TO(ScriptDataEndTagNameState); 455 455 } 456 456 bufferASCIICharacter('<'); … … 463 463 appendToTemporaryBuffer(character); 464 464 appendToPossibleEndTag(convertASCIIAlphaToLower(character)); 465 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataEndTagNameState);465 ADVANCE_TO(ScriptDataEndTagNameState); 466 466 } 467 467 if (isTokenizerWhitespace(character)) { … … 492 492 if (character == '-') { 493 493 bufferASCIICharacter('-'); 494 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataEscapeStartDashState);494 ADVANCE_TO(ScriptDataEscapeStartDashState); 495 495 } else 496 496 RECONSUME_IN(ScriptDataState); … … 500 500 if (character == '-') { 501 501 bufferASCIICharacter('-'); 502 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataEscapedDashDashState);502 ADVANCE_TO(ScriptDataEscapedDashDashState); 503 503 } else 504 504 RECONSUME_IN(ScriptDataState); … … 508 508 if (character == '-') { 509 509 bufferASCIICharacter('-'); 510 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataEscapedDashState);510 ADVANCE_TO(ScriptDataEscapedDashState); 511 511 } 512 512 if (character == '<') 513 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataEscapedLessThanSignState);513 ADVANCE_TO(ScriptDataEscapedLessThanSignState); 514 514 if (character == kEndOfFileMarker) { 515 515 parseError(); … … 523 523 if (character == '-') { 524 524 bufferASCIICharacter('-'); 525 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataEscapedDashDashState);525 ADVANCE_TO(ScriptDataEscapedDashDashState); 526 526 } 527 527 if (character == '<') 528 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataEscapedLessThanSignState);528 ADVANCE_TO(ScriptDataEscapedLessThanSignState); 529 529 if (character == kEndOfFileMarker) { 530 530 parseError(); … … 538 538 if (character == '-') { 539 539 bufferASCIICharacter('-'); 540 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataEscapedDashDashState);540 ADVANCE_TO(ScriptDataEscapedDashDashState); 541 541 } 542 542 if (character == '<') 543 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataEscapedLessThanSignState);543 ADVANCE_TO(ScriptDataEscapedLessThanSignState); 544 544 if (character == '>') { 545 545 bufferASCIICharacter('>'); 546 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataState);546 ADVANCE_TO(ScriptDataState); 547 547 } 548 548 if (character == kEndOfFileMarker) { … … 558 558 m_temporaryBuffer.clear(); 559 559 ASSERT(m_bufferedEndTagName.isEmpty()); 560 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataEscapedEndTagOpenState);560 ADVANCE_TO(ScriptDataEscapedEndTagOpenState); 561 561 } 562 562 if (isASCIIAlpha(character)) { … … 565 565 m_temporaryBuffer.clear(); 566 566 appendToTemporaryBuffer(convertASCIIAlphaToLower(character)); 567 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeStartState);567 ADVANCE_TO(ScriptDataDoubleEscapeStartState); 568 568 } 569 569 bufferASCIICharacter('<'); … … 575 575 appendToTemporaryBuffer(character); 576 576 appendToPossibleEndTag(convertASCIIAlphaToLower(character)); 577 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataEscapedEndTagNameState);577 ADVANCE_TO(ScriptDataEscapedEndTagNameState); 578 578 } 579 579 bufferASCIICharacter('<'); … … 586 586 appendToTemporaryBuffer(character); 587 587 appendToPossibleEndTag(convertASCIIAlphaToLower(character)); 588 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataEscapedEndTagNameState);588 ADVANCE_TO(ScriptDataEscapedEndTagNameState); 589 589 } 590 590 if (isTokenizerWhitespace(character)) { … … 623 623 bufferASCIICharacter(character); 624 624 appendToTemporaryBuffer(convertASCIIAlphaToLower(character)); 625 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeStartState);625 ADVANCE_TO(ScriptDataDoubleEscapeStartState); 626 626 } 627 627 RECONSUME_IN(ScriptDataEscapedState); … … 631 631 if (character == '-') { 632 632 bufferASCIICharacter('-'); 633 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedDashState);633 ADVANCE_TO(ScriptDataDoubleEscapedDashState); 634 634 } 635 635 if (character == '<') { 636 636 bufferASCIICharacter('<'); 637 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedLessThanSignState);637 ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState); 638 638 } 639 639 if (character == kEndOfFileMarker) { … … 648 648 if (character == '-') { 649 649 bufferASCIICharacter('-'); 650 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedDashDashState);650 ADVANCE_TO(ScriptDataDoubleEscapedDashDashState); 651 651 } 652 652 if (character == '<') { 653 653 bufferASCIICharacter('<'); 654 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedLessThanSignState);654 ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState); 655 655 } 656 656 if (character == kEndOfFileMarker) { … … 665 665 if (character == '-') { 666 666 bufferASCIICharacter('-'); 667 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedDashDashState);667 ADVANCE_TO(ScriptDataDoubleEscapedDashDashState); 668 668 } 669 669 if (character == '<') { 670 670 bufferASCIICharacter('<'); 671 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedLessThanSignState);671 ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState); 672 672 } 673 673 if (character == '>') { 674 674 bufferASCIICharacter('>'); 675 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataState);675 ADVANCE_TO(ScriptDataState); 676 676 } 677 677 if (character == kEndOfFileMarker) { … … 687 687 bufferASCIICharacter('/'); 688 688 m_temporaryBuffer.clear(); 689 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeEndState);689 ADVANCE_TO(ScriptDataDoubleEscapeEndState); 690 690 } 691 691 RECONSUME_IN(ScriptDataDoubleEscapedState); … … 703 703 bufferASCIICharacter(character); 704 704 appendToTemporaryBuffer(convertASCIIAlphaToLower(character)); 705 ADVANCE_ PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeEndState);705 ADVANCE_TO(ScriptDataDoubleEscapeEndState); 706 706 } 707 707 RECONSUME_IN(ScriptDataDoubleEscapedState); … … 712 712 ADVANCE_TO(BeforeAttributeNameState); 713 713 if (character == '/') 714 ADVANCE_ PAST_NON_NEWLINE_TO(SelfClosingStartTagState);714 ADVANCE_TO(SelfClosingStartTagState); 715 715 if (character == '>') 716 716 return emitAndResumeInDataState(source); … … 725 725 m_token.beginAttribute(source.numberOfCharactersConsumed()); 726 726 m_token.appendToAttributeName(toASCIILower(character)); 727 ADVANCE_ PAST_NON_NEWLINE_TO(AttributeNameState);727 ADVANCE_TO(AttributeNameState); 728 728 END_STATE() 729 729 … … 732 732 ADVANCE_TO(AfterAttributeNameState); 733 733 if (character == '/') 734 ADVANCE_ PAST_NON_NEWLINE_TO(SelfClosingStartTagState);734 ADVANCE_TO(SelfClosingStartTagState); 735 735 if (character == '=') 736 ADVANCE_ PAST_NON_NEWLINE_TO(BeforeAttributeValueState);736 ADVANCE_TO(BeforeAttributeValueState); 737 737 if (character == '>') 738 738 return emitAndResumeInDataState(source); … … 746 746 parseError(); 747 747 m_token.appendToAttributeName(toASCIILower(character)); 748 ADVANCE_ PAST_NON_NEWLINE_TO(AttributeNameState);748 ADVANCE_TO(AttributeNameState); 749 749 END_STATE() 750 750 … … 753 753 ADVANCE_TO(AfterAttributeNameState); 754 754 if (character == '/') 755 ADVANCE_ PAST_NON_NEWLINE_TO(SelfClosingStartTagState);755 ADVANCE_TO(SelfClosingStartTagState); 756 756 if (character == '=') 757 ADVANCE_ PAST_NON_NEWLINE_TO(BeforeAttributeValueState);757 ADVANCE_TO(BeforeAttributeValueState); 758 758 if (character == '>') 759 759 return emitAndResumeInDataState(source); … … 768 768 m_token.beginAttribute(source.numberOfCharactersConsumed()); 769 769 m_token.appendToAttributeName(toASCIILower(character)); 770 ADVANCE_ PAST_NON_NEWLINE_TO(AttributeNameState);770 ADVANCE_TO(AttributeNameState); 771 771 END_STATE() 772 772 … … 775 775 ADVANCE_TO(BeforeAttributeValueState); 776 776 if (character == '"') 777 ADVANCE_ PAST_NON_NEWLINE_TO(AttributeValueDoubleQuotedState);777 ADVANCE_TO(AttributeValueDoubleQuotedState); 778 778 if (character == '&') 779 779 RECONSUME_IN(AttributeValueUnquotedState); 780 780 if (character == '\'') 781 ADVANCE_ PAST_NON_NEWLINE_TO(AttributeValueSingleQuotedState);781 ADVANCE_TO(AttributeValueSingleQuotedState); 782 782 if (character == '>') { 783 783 parseError(); … … 791 791 parseError(); 792 792 m_token.appendToAttributeValue(character); 793 ADVANCE_ PAST_NON_NEWLINE_TO(AttributeValueUnquotedState);793 ADVANCE_TO(AttributeValueUnquotedState); 794 794 END_STATE() 795 795 … … 797 797 if (character == '"') { 798 798 m_token.endAttribute(source.numberOfCharactersConsumed()); 799 ADVANCE_ PAST_NON_NEWLINE_TO(AfterAttributeValueQuotedState);799 ADVANCE_TO(AfterAttributeValueQuotedState); 800 800 } 801 801 if (character == '&') { 802 802 m_additionalAllowedCharacter = '"'; 803 ADVANCE_ PAST_NON_NEWLINE_TO(CharacterReferenceInAttributeValueState);803 ADVANCE_TO(CharacterReferenceInAttributeValueState); 804 804 } 805 805 if (character == kEndOfFileMarker) { … … 815 815 if (character == '\'') { 816 816 m_token.endAttribute(source.numberOfCharactersConsumed()); 817 ADVANCE_ PAST_NON_NEWLINE_TO(AfterAttributeValueQuotedState);817 ADVANCE_TO(AfterAttributeValueQuotedState); 818 818 } 819 819 if (character == '&') { 820 820 m_additionalAllowedCharacter = '\''; 821 ADVANCE_ PAST_NON_NEWLINE_TO(CharacterReferenceInAttributeValueState);821 ADVANCE_TO(CharacterReferenceInAttributeValueState); 822 822 } 823 823 if (character == kEndOfFileMarker) { … … 837 837 if (character == '&') { 838 838 m_additionalAllowedCharacter = '>'; 839 ADVANCE_ PAST_NON_NEWLINE_TO(CharacterReferenceInAttributeValueState);839 ADVANCE_TO(CharacterReferenceInAttributeValueState); 840 840 } 841 841 if (character == '>') { … … 851 851 parseError(); 852 852 m_token.appendToAttributeValue(character); 853 ADVANCE_ PAST_NON_NEWLINE_TO(AttributeValueUnquotedState);853 ADVANCE_TO(AttributeValueUnquotedState); 854 854 END_STATE() 855 855 … … 883 883 ADVANCE_TO(BeforeAttributeNameState); 884 884 if (character == '/') 885 ADVANCE_ PAST_NON_NEWLINE_TO(SelfClosingStartTagState);885 ADVANCE_TO(SelfClosingStartTagState); 886 886 if (character == '>') 887 887 return emitAndResumeInDataState(source); … … 933 933 RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken()); 934 934 } else if (isASCIIAlphaCaselessEqual(character, 'd')) { 935 auto result = source.advancePast LettersIgnoringASCIICase("doctype");935 auto result = source.advancePastIgnoringCase("doctype"); 936 936 if (result == SegmentedString::DidMatch) 937 937 SWITCH_TO(DOCTYPEState); … … 951 951 BEGIN_STATE(CommentStartState) 952 952 if (character == '-') 953 ADVANCE_ PAST_NON_NEWLINE_TO(CommentStartDashState);953 ADVANCE_TO(CommentStartDashState); 954 954 if (character == '>') { 955 955 parseError(); … … 966 966 BEGIN_STATE(CommentStartDashState) 967 967 if (character == '-') 968 ADVANCE_ PAST_NON_NEWLINE_TO(CommentEndState);968 ADVANCE_TO(CommentEndState); 969 969 if (character == '>') { 970 970 parseError(); … … 982 982 BEGIN_STATE(CommentState) 983 983 if (character == '-') 984 ADVANCE_ PAST_NON_NEWLINE_TO(CommentEndDashState);984 ADVANCE_TO(CommentEndDashState); 985 985 if (character == kEndOfFileMarker) { 986 986 parseError(); … … 993 993 BEGIN_STATE(CommentEndDashState) 994 994 if (character == '-') 995 ADVANCE_ PAST_NON_NEWLINE_TO(CommentEndState);995 ADVANCE_TO(CommentEndState); 996 996 if (character == kEndOfFileMarker) { 997 997 parseError(); … … 1008 1008 if (character == '!') { 1009 1009 parseError(); 1010 ADVANCE_ PAST_NON_NEWLINE_TO(CommentEndBangState);1010 ADVANCE_TO(CommentEndBangState); 1011 1011 } 1012 1012 if (character == '-') { 1013 1013 parseError(); 1014 1014 m_token.appendToComment('-'); 1015 ADVANCE_ PAST_NON_NEWLINE_TO(CommentEndState);1015 ADVANCE_TO(CommentEndState); 1016 1016 } 1017 1017 if (character == kEndOfFileMarker) { … … 1031 1031 m_token.appendToComment('-'); 1032 1032 m_token.appendToComment('!'); 1033 ADVANCE_ PAST_NON_NEWLINE_TO(CommentEndDashState);1033 ADVANCE_TO(CommentEndDashState); 1034 1034 } 1035 1035 if (character == '>') … … 1075 1075 } 1076 1076 m_token.beginDOCTYPE(toASCIILower(character)); 1077 ADVANCE_ PAST_NON_NEWLINE_TO(DOCTYPENameState);1077 ADVANCE_TO(DOCTYPENameState); 1078 1078 END_STATE() 1079 1079 … … 1089 1089 } 1090 1090 m_token.appendToName(toASCIILower(character)); 1091 ADVANCE_ PAST_NON_NEWLINE_TO(DOCTYPENameState);1091 ADVANCE_TO(DOCTYPENameState); 1092 1092 END_STATE() 1093 1093 … … 1103 1103 } 1104 1104 if (isASCIIAlphaCaselessEqual(character, 'p')) { 1105 auto result = source.advancePast LettersIgnoringASCIICase("public");1105 auto result = source.advancePastIgnoringCase("public"); 1106 1106 if (result == SegmentedString::DidMatch) 1107 1107 SWITCH_TO(AfterDOCTYPEPublicKeywordState); … … 1109 1109 RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken()); 1110 1110 } else if (isASCIIAlphaCaselessEqual(character, 's')) { 1111 auto result = source.advancePast LettersIgnoringASCIICase("system");1111 auto result = source.advancePastIgnoringCase("system"); 1112 1112 if (result == SegmentedString::DidMatch) 1113 1113 SWITCH_TO(AfterDOCTYPESystemKeywordState); … … 1117 1117 parseError(); 1118 1118 m_token.setForceQuirks(); 1119 ADVANCE_ PAST_NON_NEWLINE_TO(BogusDOCTYPEState);1119 ADVANCE_TO(BogusDOCTYPEState); 1120 1120 END_STATE() 1121 1121 … … 1126 1126 parseError(); 1127 1127 m_token.setPublicIdentifierToEmptyString(); 1128 ADVANCE_ PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);1128 ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); 1129 1129 } 1130 1130 if (character == '\'') { 1131 1131 parseError(); 1132 1132 m_token.setPublicIdentifierToEmptyString(); 1133 ADVANCE_ PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierSingleQuotedState);1133 ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); 1134 1134 } 1135 1135 if (character == '>') { … … 1145 1145 parseError(); 1146 1146 m_token.setForceQuirks(); 1147 ADVANCE_ PAST_NON_NEWLINE_TO(BogusDOCTYPEState);1147 ADVANCE_TO(BogusDOCTYPEState); 1148 1148 END_STATE() 1149 1149 … … 1153 1153 if (character == '"') { 1154 1154 m_token.setPublicIdentifierToEmptyString(); 1155 ADVANCE_ PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);1155 ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); 1156 1156 } 1157 1157 if (character == '\'') { 1158 1158 m_token.setPublicIdentifierToEmptyString(); 1159 ADVANCE_ PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierSingleQuotedState);1159 ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); 1160 1160 } 1161 1161 if (character == '>') { … … 1171 1171 parseError(); 1172 1172 m_token.setForceQuirks(); 1173 ADVANCE_ PAST_NON_NEWLINE_TO(BogusDOCTYPEState);1173 ADVANCE_TO(BogusDOCTYPEState); 1174 1174 END_STATE() 1175 1175 1176 1176 BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) 1177 1177 if (character == '"') 1178 ADVANCE_ PAST_NON_NEWLINE_TO(AfterDOCTYPEPublicIdentifierState);1178 ADVANCE_TO(AfterDOCTYPEPublicIdentifierState); 1179 1179 if (character == '>') { 1180 1180 parseError(); … … 1193 1193 BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) 1194 1194 if (character == '\'') 1195 ADVANCE_ PAST_NON_NEWLINE_TO(AfterDOCTYPEPublicIdentifierState);1195 ADVANCE_TO(AfterDOCTYPEPublicIdentifierState); 1196 1196 if (character == '>') { 1197 1197 parseError(); … … 1216 1216 parseError(); 1217 1217 m_token.setSystemIdentifierToEmptyString(); 1218 ADVANCE_ PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState);1218 ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); 1219 1219 } 1220 1220 if (character == '\'') { 1221 1221 parseError(); 1222 1222 m_token.setSystemIdentifierToEmptyString(); 1223 ADVANCE_ PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState);1223 ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); 1224 1224 } 1225 1225 if (character == kEndOfFileMarker) { … … 1230 1230 parseError(); 1231 1231 m_token.setForceQuirks(); 1232 ADVANCE_ PAST_NON_NEWLINE_TO(BogusDOCTYPEState);1232 ADVANCE_TO(BogusDOCTYPEState); 1233 1233 END_STATE() 1234 1234 … … 1240 1240 if (character == '"') { 1241 1241 m_token.setSystemIdentifierToEmptyString(); 1242 ADVANCE_ PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState);1242 ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); 1243 1243 } 1244 1244 if (character == '\'') { 1245 1245 m_token.setSystemIdentifierToEmptyString(); 1246 ADVANCE_ PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState);1246 ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); 1247 1247 } 1248 1248 if (character == kEndOfFileMarker) { … … 1253 1253 parseError(); 1254 1254 m_token.setForceQuirks(); 1255 ADVANCE_ PAST_NON_NEWLINE_TO(BogusDOCTYPEState);1255 ADVANCE_TO(BogusDOCTYPEState); 1256 1256 END_STATE() 1257 1257 … … 1262 1262 parseError(); 1263 1263 m_token.setSystemIdentifierToEmptyString(); 1264 ADVANCE_ PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState);1264 ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); 1265 1265 } 1266 1266 if (character == '\'') { 1267 1267 parseError(); 1268 1268 m_token.setSystemIdentifierToEmptyString(); 1269 ADVANCE_ PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState);1269 ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); 1270 1270 } 1271 1271 if (character == '>') { … … 1281 1281 parseError(); 1282 1282 m_token.setForceQuirks(); 1283 ADVANCE_ PAST_NON_NEWLINE_TO(BogusDOCTYPEState);1283 ADVANCE_TO(BogusDOCTYPEState); 1284 1284 END_STATE() 1285 1285 … … 1289 1289 if (character == '"') { 1290 1290 m_token.setSystemIdentifierToEmptyString(); 1291 ADVANCE_ PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState);1291 ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); 1292 1292 } 1293 1293 if (character == '\'') { 1294 1294 m_token.setSystemIdentifierToEmptyString(); 1295 ADVANCE_ PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState);1295 ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); 1296 1296 } 1297 1297 if (character == '>') { … … 1307 1307 parseError(); 1308 1308 m_token.setForceQuirks(); 1309 ADVANCE_ PAST_NON_NEWLINE_TO(BogusDOCTYPEState);1309 ADVANCE_TO(BogusDOCTYPEState); 1310 1310 END_STATE() 1311 1311 1312 1312 BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) 1313 1313 if (character == '"') 1314 ADVANCE_ PAST_NON_NEWLINE_TO(AfterDOCTYPESystemIdentifierState);1314 ADVANCE_TO(AfterDOCTYPESystemIdentifierState); 1315 1315 if (character == '>') { 1316 1316 parseError(); … … 1329 1329 BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) 1330 1330 if (character == '\'') 1331 ADVANCE_ PAST_NON_NEWLINE_TO(AfterDOCTYPESystemIdentifierState);1331 ADVANCE_TO(AfterDOCTYPESystemIdentifierState); 1332 1332 if (character == '>') { 1333 1333 parseError(); … … 1355 1355 } 1356 1356 parseError(); 1357 ADVANCE_ PAST_NON_NEWLINE_TO(BogusDOCTYPEState);1357 ADVANCE_TO(BogusDOCTYPEState); 1358 1358 END_STATE() 1359 1359 … … 1368 1368 BEGIN_STATE(CDATASectionState) 1369 1369 if (character == ']') 1370 ADVANCE_ PAST_NON_NEWLINE_TO(CDATASectionRightSquareBracketState);1370 ADVANCE_TO(CDATASectionRightSquareBracketState); 1371 1371 if (character == kEndOfFileMarker) 1372 1372 RECONSUME_IN(DataState); … … 1377 1377 BEGIN_STATE(CDATASectionRightSquareBracketState) 1378 1378 if (character == ']') 1379 ADVANCE_ PAST_NON_NEWLINE_TO(CDATASectionDoubleRightSquareBracketState);1379 ADVANCE_TO(CDATASectionDoubleRightSquareBracketState); 1380 1380 bufferASCIICharacter(']'); 1381 1381 RECONSUME_IN(CDATASectionState); … … 1384 1384 BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) 1385 1385 if (character == '>') 1386 ADVANCE_ PAST_NON_NEWLINE_TO(DataState);1386 ADVANCE_TO(DataState); 1387 1387 bufferASCIICharacter(']'); 1388 1388 bufferASCIICharacter(']'); -
trunk/Source/WebCore/html/parser/InputStreamPreprocessor.h
r209058 r209120 29 29 30 30 #include "SegmentedString.h" 31 #include <wtf/Noncopyable.h> 31 32 #include <wtf/unicode/CharacterNames.h> 32 33 33 34 namespace WebCore { 34 35 36 const LChar kEndOfFileMarker = 0; 37 35 38 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream 36 39 template <typename Tokenizer> 37 40 class InputStreamPreprocessor { 41 WTF_MAKE_NONCOPYABLE(InputStreamPreprocessor); 38 42 public: 39 43 explicit InputStreamPreprocessor(Tokenizer& tokenizer) 40 44 : m_tokenizer(tokenizer) 41 45 { 46 reset(); 42 47 } 43 48 … … 49 54 ALWAYS_INLINE bool peek(SegmentedString& source, bool skipNullCharacters = false) 50 55 { 51 if ( UNLIKELY(source.isEmpty()))56 if (source.isEmpty()) 52 57 return false; 53 58 54 m_nextInputCharacter = source.currentChar acter();59 m_nextInputCharacter = source.currentChar(); 55 60 56 61 // Every branch in this function is expensive, so we have a … … 58 63 // handling. Please run the parser benchmark whenever you touch 59 64 // this function. It's very hot. 60 constexprUChar specialCharacterMask = '\n' | '\r' | '\0';61 if ( LIKELY(m_nextInputCharacter & ~specialCharacterMask)) {65 static const UChar specialCharacterMask = '\n' | '\r' | '\0'; 66 if (m_nextInputCharacter & ~specialCharacterMask) { 62 67 m_skipNextNewLine = false; 63 68 return true; 64 69 } 65 66 70 return processNextInputCharacter(source, skipNullCharacters); 67 71 } … … 70 74 ALWAYS_INLINE bool advance(SegmentedString& source, bool skipNullCharacters = false) 71 75 { 72 source.advance ();76 source.advanceAndUpdateLineNumber(); 73 77 return peek(source, skipNullCharacters); 74 78 } 75 ALWAYS_INLINE bool advancePastNonNewline(SegmentedString& source, bool skipNullCharacters = false) 79 80 bool skipNextNewLine() const { return m_skipNextNewLine; } 81 82 void reset(bool skipNextNewLine = false) 76 83 { 77 source.advancePastNonNewline();78 return peek(source, skipNullCharacters);84 m_nextInputCharacter = '\0'; 85 m_skipNextNewLine = skipNextNewLine; 79 86 } 80 87 … … 83 90 { 84 91 ProcessAgain: 85 ASSERT(m_nextInputCharacter == source.currentCharacter()); 92 ASSERT(m_nextInputCharacter == source.currentChar()); 93 86 94 if (m_nextInputCharacter == '\n' && m_skipNextNewLine) { 87 95 m_skipNextNewLine = false; 88 source.advancePastNewline ();96 source.advancePastNewlineAndUpdateLineNumber(); 89 97 if (source.isEmpty()) 90 98 return false; 91 m_nextInputCharacter = source.currentChar acter();99 m_nextInputCharacter = source.currentChar(); 92 100 } 93 101 if (m_nextInputCharacter == '\r') { 94 102 m_nextInputCharacter = '\n'; 95 103 m_skipNextNewLine = true; 96 return true; 104 } else { 105 m_skipNextNewLine = false; 106 // FIXME: The spec indicates that the surrogate pair range as well as 107 // a number of specific character values are parse errors and should be replaced 108 // by the replacement character. We suspect this is a problem with the spec as doing 109 // that filtering breaks surrogate pair handling and causes us not to match Minefield. 110 if (m_nextInputCharacter == '\0' && !shouldTreatNullAsEndOfFileMarker(source)) { 111 if (skipNullCharacters && !m_tokenizer.neverSkipNullCharacters()) { 112 source.advancePastNonNewline(); 113 if (source.isEmpty()) 114 return false; 115 m_nextInputCharacter = source.currentChar(); 116 goto ProcessAgain; 117 } 118 m_nextInputCharacter = replacementCharacter; 119 } 97 120 } 98 m_skipNextNewLine = false;99 if (m_nextInputCharacter || isAtEndOfFile(source))100 return true;101 if (skipNullCharacters && !m_tokenizer.neverSkipNullCharacters()) {102 source.advancePastNonNewline();103 if (source.isEmpty())104 return false;105 m_nextInputCharacter = source.currentCharacter();106 goto ProcessAgain;107 }108 m_nextInputCharacter = replacementCharacter;109 121 return true; 110 122 } 111 123 112 static bool isAtEndOfFile(SegmentedString& source)124 bool shouldTreatNullAsEndOfFileMarker(SegmentedString& source) const 113 125 { 114 126 return source.isClosed() && source.length() == 1; … … 118 130 119 131 // http://www.whatwg.org/specs/web-apps/current-work/#next-input-character 120 UChar m_nextInputCharacter { 0 };121 bool m_skipNextNewLine { false };132 UChar m_nextInputCharacter; 133 bool m_skipNextNewLine; 122 134 }; 123 135 -
trunk/Source/WebCore/html/track/BufferedLineReader.cpp
r209058 r209120 36 36 namespace WebCore { 37 37 38 std::optional<String> BufferedLineReader::nextLine()38 bool BufferedLineReader::getLine(String& line) 39 39 { 40 40 if (m_maybeSkipLF) { … … 43 43 // then skip it, and then (unconditionally) return the buffered line. 44 44 if (!m_buffer.isEmpty()) { 45 if (m_buffer.currentCharacter() == newlineCharacter) 46 m_buffer.advancePastNewline(); 45 scanCharacter(newlineCharacter); 47 46 m_maybeSkipLF = false; 48 47 } 49 48 // If there was no (new) data available, then keep m_maybeSkipLF set, 50 // and fall through all the way down to the EOS check at the end of the function. 49 // and fall through all the way down to the EOS check at the end of 50 // the method. 51 51 } 52 52 … … 54 54 bool checkForLF = false; 55 55 while (!m_buffer.isEmpty()) { 56 UChar c haracter = m_buffer.currentCharacter();56 UChar c = m_buffer.currentChar(); 57 57 m_buffer.advance(); 58 58 59 if (c haracter == newlineCharacter || character== carriageReturn) {59 if (c == newlineCharacter || c == carriageReturn) { 60 60 // We found a line ending. Return the accumulated line. 61 61 shouldReturnLine = true; 62 checkForLF = (c haracter== carriageReturn);62 checkForLF = (c == carriageReturn); 63 63 break; 64 64 } … … 66 66 // NULs are transformed into U+FFFD (REPLACEMENT CHAR.) in step 1 of 67 67 // the WebVTT parser algorithm. 68 if (c haracter== '\0')69 c haracter= replacementCharacter;68 if (c == '\0') 69 c = replacementCharacter; 70 70 71 m_lineBuffer.append(c haracter);71 m_lineBuffer.append(c); 72 72 } 73 73 … … 75 75 // May be in the middle of a CRLF pair. 76 76 if (!m_buffer.isEmpty()) { 77 if (m_buffer.currentCharacter() == newlineCharacter)78 m_buffer.advancePastNewline();77 // Scan a potential newline character. 78 scanCharacter(newlineCharacter); 79 79 } else { 80 // Check for the newlineon the next call (unless we reached EOS, in80 // Check for the LF on the next call (unless we reached EOS, in 81 81 // which case we'll return the contents of the line buffer, and 82 82 // reset state for the next line.) … … 93 93 94 94 if (shouldReturnLine) { 95 autoline = m_lineBuffer.toString();95 line = m_lineBuffer.toString(); 96 96 m_lineBuffer.clear(); 97 return WTFMove(line);97 return true; 98 98 } 99 99 100 100 ASSERT(m_buffer.isEmpty()); 101 return std::nullopt;101 return false; 102 102 } 103 103 -
trunk/Source/WebCore/html/track/BufferedLineReader.h
r209058 r209120 39 39 // 40 40 // Converts a stream of data (== a sequence of Strings) into a set of 41 // lines. CR, LR or CRLF are considered line 42 // to 'REPLACEMENT CHARACTER' (U+FFFD) and does not return the line 41 // lines. CR, LR or CRLF are considered linebreaks. Normalizes NULs (U+0000) 42 // to 'REPLACEMENT CHARACTER' (U+FFFD) and does not return the linebreaks as 43 43 // part of the result. 44 44 class BufferedLineReader { 45 45 WTF_MAKE_NONCOPYABLE(BufferedLineReader); 46 46 public: 47 BufferedLineReader() = default; 48 void reset(); 47 BufferedLineReader() 48 : m_endOfStream(false) 49 , m_maybeSkipLF(false) { } 49 50 50 void append(String&& data) 51 // Append data to the internal buffer. 52 void append(const String& data) 51 53 { 52 54 ASSERT(!m_endOfStream); 53 m_buffer.append( WTFMove(data));55 m_buffer.append(SegmentedString(data)); 54 56 } 55 57 56 void appendEndOfStream() { m_endOfStream = true; } 58 // Indicate that no more data will be appended. This will cause any 59 // potentially "unterminated" line to be returned from getLine. 60 void setEndOfStream() { m_endOfStream = true; } 61 62 // Attempt to read a line from the internal buffer (fed via append). 63 // If successful, true is returned and |line| is set to the line that was 64 // read. If no line could be read false is returned. 65 bool getLine(String& line); 66 67 // Returns true if EOS has been reached proper. 57 68 bool isAtEndOfStream() const { return m_endOfStream && m_buffer.isEmpty(); } 58 69 59 std::optional<String> nextLine();70 void reset() { m_buffer.clear(); } 60 71 61 72 private: 73 // Consume the next character the buffer if it is the character |c|. 74 void scanCharacter(UChar c) 75 { 76 ASSERT(!m_buffer.isEmpty()); 77 if (m_buffer.currentChar() == c) 78 m_buffer.advance(); 79 } 80 62 81 SegmentedString m_buffer; 63 82 StringBuilder m_lineBuffer; 64 bool m_endOfStream { false };65 bool m_maybeSkipLF { false };83 bool m_endOfStream; 84 bool m_maybeSkipLF; 66 85 }; 67 86 68 inline void BufferedLineReader::reset()69 {70 m_buffer.clear();71 m_lineBuffer.clear();72 m_endOfStream = false;73 m_maybeSkipLF = false;74 }75 76 87 } // namespace WebCore -
trunk/Source/WebCore/html/track/InbandGenericTextTrack.cpp
r209058 r209120 186 186 } 187 187 188 void InbandGenericTextTrack::parseWebVTTFileHeader(InbandTextTrackPrivate* trackPrivate, String &&header)188 void InbandGenericTextTrack::parseWebVTTFileHeader(InbandTextTrackPrivate* trackPrivate, String header) 189 189 { 190 190 ASSERT_UNUSED(trackPrivate, trackPrivate == m_private); 191 parser().parseFileHeader( WTFMove(header));191 parser().parseFileHeader(header); 192 192 } 193 193 -
trunk/Source/WebCore/html/track/InbandGenericTextTrack.h
r209058 r209120 73 73 WebVTTParser& parser(); 74 74 void parseWebVTTCueData(InbandTextTrackPrivate*, const ISOWebVTTCue&) final; 75 void parseWebVTTFileHeader(InbandTextTrackPrivate*, String &&) final;75 void parseWebVTTFileHeader(InbandTextTrackPrivate*, String) final; 76 76 77 77 void newCuesParsed() final; -
trunk/Source/WebCore/html/track/InbandTextTrack.h
r209058 r209120 80 80 void removeGenericCue(InbandTextTrackPrivate*, GenericCueData*) override { ASSERT_NOT_REACHED(); } 81 81 82 void parseWebVTTFileHeader(InbandTextTrackPrivate*, String &&) override { ASSERT_NOT_REACHED(); }82 void parseWebVTTFileHeader(InbandTextTrackPrivate*, String) override { ASSERT_NOT_REACHED(); } 83 83 void parseWebVTTCueData(InbandTextTrackPrivate*, const char*, unsigned) override { ASSERT_NOT_REACHED(); } 84 84 void parseWebVTTCueData(InbandTextTrackPrivate*, const ISOWebVTTCue&) override { ASSERT_NOT_REACHED(); } -
trunk/Source/WebCore/html/track/WebVTTParser.cpp
r209058 r209120 105 105 } 106 106 107 void WebVTTParser::parseFileHeader( String&& data)107 void WebVTTParser::parseFileHeader(const String& data) 108 108 { 109 109 m_state = Initial; 110 110 m_lineReader.reset(); 111 m_lineReader.append( WTFMove(data));111 m_lineReader.append(data); 112 112 parse(); 113 113 } … … 115 115 void WebVTTParser::parseBytes(const char* data, unsigned length) 116 116 { 117 m_lineReader.append(m_decoder->decode(data, length)); 117 String textData = m_decoder->decode(data, length); 118 m_lineReader.append(textData); 118 119 parse(); 119 120 } … … 121 122 void WebVTTParser::parseCueData(const ISOWebVTTCue& data) 122 123 { 123 autocue = WebVTTCueData::create();124 RefPtr<WebVTTCueData> cue = WebVTTCueData::create(); 124 125 125 126 MediaTime startTime = data.presentationTime(); … … 135 136 cue->setOriginalStartTime(originalStartTime); 136 137 137 m_cuelist.append( WTFMove(cue));138 m_cuelist.append(cue); 138 139 if (m_client) 139 140 m_client->newCuesParsed(); … … 142 143 void WebVTTParser::flush() 143 144 { 144 m_lineReader.append(m_decoder->flush()); 145 m_lineReader.appendEndOfStream(); 145 String textData = m_decoder->flush(); 146 m_lineReader.append(textData); 147 m_lineReader.setEndOfStream(); 146 148 parse(); 147 149 flushPendingCue(); … … 152 154 // WebVTT parser algorithm. (5.1 WebVTT file parsing.) 153 155 // Steps 1 - 3 - Initial setup. 154 while (auto line = m_lineReader.nextLine()) { 156 String line; 157 while (m_lineReader.getLine(line)) { 158 if (line.isNull()) 159 return; 160 155 161 switch (m_state) { 156 162 case Initial: 157 163 // Steps 4 - 9 - Check for a valid WebVTT signature. 158 if (!hasRequiredFileIdentifier( *line)) {164 if (!hasRequiredFileIdentifier(line)) { 159 165 if (m_client) 160 166 m_client->fileFailedToParse(); … … 166 172 167 173 case Header: 168 collectMetadataHeader( *line);169 170 if (line ->isEmpty()) {174 collectMetadataHeader(line); 175 176 if (line.isEmpty()) { 171 177 // Steps 10-14 - Allow a header (comment area) under the WEBVTT line. 172 178 if (m_client && m_regionList.size()) … … 176 182 } 177 183 // Step 15 - Break out of header loop if the line could be a timestamp line. 178 if (line ->contains("-->"))179 m_state = recoverCue( *line);184 if (line.contains("-->")) 185 m_state = recoverCue(line); 180 186 181 187 // Step 16 - Line is not the empty string and does not contain "-->". … … 184 190 case Id: 185 191 // Steps 17 - 20 - Allow any number of line terminators, then initialize new cue values. 186 if (line ->isEmpty())192 if (line.isEmpty()) 187 193 break; 188 194 … … 191 197 192 198 // Steps 22 - 25 - Check if this line contains an optional identifier or timing data. 193 m_state = collectCueId( *line);199 m_state = collectCueId(line); 194 200 break; 195 201 196 202 case TimingsAndSettings: 197 203 // Steps 26 - 27 - Discard current cue if the line is empty. 198 if (line ->isEmpty()) {204 if (line.isEmpty()) { 199 205 m_state = Id; 200 206 break; … … 202 208 203 209 // Steps 28 - 29 - Collect cue timings and settings. 204 m_state = collectTimingsAndSettings( *line);210 m_state = collectTimingsAndSettings(line); 205 211 break; 206 212 207 213 case CueText: 208 214 // Steps 31 - 41 - Collect the cue text, create a cue, and add it to the output. 209 m_state = collectCueText( *line);215 m_state = collectCueText(line); 210 216 break; 211 217 212 218 case BadCue: 213 219 // Steps 42 - 48 - Discard lines until an empty line or a potential timing line is seen. 214 m_state = ignoreBadCue( *line);220 m_state = ignoreBadCue(line); 215 221 break; 216 222 -
trunk/Source/WebCore/html/track/WebVTTParser.h
r209058 r209120 134 134 // Input data to the parser to parse. 135 135 void parseBytes(const char*, unsigned); 136 void parseFileHeader( String&&);136 void parseFileHeader(const String&); 137 137 void parseCueData(const ISOWebVTTCue&); 138 138 void flush(); -
trunk/Source/WebCore/html/track/WebVTTTokenizer.cpp
r209058 r209120 31 31 32 32 #include "config.h" 33 34 #if ENABLE(VIDEO_TRACK) 35 33 36 #include "WebVTTTokenizer.h" 34 35 #if ENABLE(VIDEO_TRACK)36 37 37 38 #include "MarkupTokenizerInlines.h" … … 48 49 goto stateName; \ 49 50 } while (false) 50 51 51 52 template<unsigned charactersCount> ALWAYS_INLINE bool equalLiteral(const StringBuilder& s, const char (&characters)[charactersCount]) 52 53 { … … 69 70 inline bool advanceAndEmitToken(SegmentedString& source, WebVTTToken& resultToken, const WebVTTToken& token) 70 71 { 71 source.advance ();72 source.advanceAndUpdateLineNumber(); 72 73 return emitToken(resultToken, token); 73 74 } … … 79 80 // Append an EOF marker and close the input "stream". 80 81 ASSERT(!m_input.isClosed()); 81 m_input.append(S tring { &kEndOfFileMarker, 1 });82 m_input.append(SegmentedString(String(&kEndOfFileMarker, 1))); 82 83 m_input.close(); 83 84 } -
trunk/Source/WebCore/platform/graphics/InbandTextTrackPrivateClient.h
r209058 r209120 181 181 virtual void removeGenericCue(InbandTextTrackPrivate*, GenericCueData*) = 0; 182 182 183 virtual void parseWebVTTFileHeader(InbandTextTrackPrivate*, String &&) { ASSERT_NOT_REACHED(); }183 virtual void parseWebVTTFileHeader(InbandTextTrackPrivate*, String) { ASSERT_NOT_REACHED(); } 184 184 virtual void parseWebVTTCueData(InbandTextTrackPrivate*, const char* data, unsigned length) = 0; 185 185 virtual void parseWebVTTCueData(InbandTextTrackPrivate*, const ISOWebVTTCue&) = 0; -
trunk/Source/WebCore/platform/text/SegmentedString.cpp
r209058 r209120 1 1 /* 2 Copyright (C) 2004 -2016Apple Inc. All rights reserved.2 Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. 3 3 4 4 This library is free software; you can redistribute it and/or … … 21 21 #include "SegmentedString.h" 22 22 23 #include <wtf/text/StringBuilder.h>24 23 #include <wtf/text/TextPosition.h> 25 24 26 25 namespace WebCore { 27 26 28 inline void SegmentedString::Substring::appendTo(StringBuilder& builder) const 29 { 30 builder.append(string, string.length() - length, length); 31 } 32 33 SegmentedString& SegmentedString::operator=(SegmentedString&& other) 34 { 35 m_currentSubstring = WTFMove(other.m_currentSubstring); 36 m_otherSubstrings = WTFMove(other.m_otherSubstrings); 37 38 m_isClosed = other.m_isClosed; 39 40 m_currentCharacter = other.m_currentCharacter; 41 42 m_numberOfCharactersConsumedPriorToCurrentSubstring = other.m_numberOfCharactersConsumedPriorToCurrentSubstring; 27 SegmentedString::SegmentedString(const SegmentedString& other) 28 : m_pushedChar1(other.m_pushedChar1) 29 , m_pushedChar2(other.m_pushedChar2) 30 , m_currentString(other.m_currentString) 31 , m_numberOfCharactersConsumedPriorToCurrentString(other.m_numberOfCharactersConsumedPriorToCurrentString) 32 , m_numberOfCharactersConsumedPriorToCurrentLine(other.m_numberOfCharactersConsumedPriorToCurrentLine) 33 , m_currentLine(other.m_currentLine) 34 , m_substrings(other.m_substrings) 35 , m_closed(other.m_closed) 36 , m_empty(other.m_empty) 37 , m_fastPathFlags(other.m_fastPathFlags) 38 , m_advanceFunc(other.m_advanceFunc) 39 , m_advanceAndUpdateLineNumberFunc(other.m_advanceAndUpdateLineNumberFunc) 40 { 41 if (m_pushedChar2) 42 m_currentChar = m_pushedChar2; 43 else if (m_pushedChar1) 44 m_currentChar = m_pushedChar1; 45 else 46 m_currentChar = m_currentString.m_length ? m_currentString.getCurrentChar() : 0; 47 } 48 49 SegmentedString& SegmentedString::operator=(const SegmentedString& other) 50 { 51 m_pushedChar1 = other.m_pushedChar1; 52 m_pushedChar2 = other.m_pushedChar2; 53 m_currentString = other.m_currentString; 54 m_substrings = other.m_substrings; 55 if (m_pushedChar2) 56 m_currentChar = m_pushedChar2; 57 else if (m_pushedChar1) 58 m_currentChar = m_pushedChar1; 59 else 60 m_currentChar = m_currentString.m_length ? m_currentString.getCurrentChar() : 0; 61 62 m_closed = other.m_closed; 63 m_empty = other.m_empty; 64 m_fastPathFlags = other.m_fastPathFlags; 65 m_numberOfCharactersConsumedPriorToCurrentString = other.m_numberOfCharactersConsumedPriorToCurrentString; 43 66 m_numberOfCharactersConsumedPriorToCurrentLine = other.m_numberOfCharactersConsumedPriorToCurrentLine; 44 67 m_currentLine = other.m_currentLine; 45 68 46 m_fastPathFlags = other.m_fastPathFlags; 47 m_advanceWithoutUpdatingLineNumberFunction = other.m_advanceWithoutUpdatingLineNumberFunction; 48 m_advanceAndUpdateLineNumberFunction = other.m_advanceAndUpdateLineNumberFunction; 49 50 other.clear(); 69 m_advanceFunc = other.m_advanceFunc; 70 m_advanceAndUpdateLineNumberFunc = other.m_advanceAndUpdateLineNumberFunc; 51 71 52 72 return *this; … … 55 75 unsigned SegmentedString::length() const 56 76 { 57 unsigned length = m_currentSubstring.length; 58 for (auto& substring : m_otherSubstrings) 59 length += substring.length; 77 unsigned length = m_currentString.m_length; 78 if (m_pushedChar1) { 79 ++length; 80 if (m_pushedChar2) 81 ++length; 82 } 83 if (isComposite()) { 84 Deque<SegmentedSubstring>::const_iterator it = m_substrings.begin(); 85 Deque<SegmentedSubstring>::const_iterator e = m_substrings.end(); 86 for (; it != e; ++it) 87 length += it->m_length; 88 } 60 89 return length; 61 90 } … … 63 92 void SegmentedString::setExcludeLineNumbers() 64 93 { 65 if (!m_currentSubstring.doNotExcludeLineNumbers) 66 return; 67 m_currentSubstring.doNotExcludeLineNumbers = false; 68 for (auto& substring : m_otherSubstrings) 69 substring.doNotExcludeLineNumbers = false; 70 updateAdvanceFunctionPointers(); 94 m_currentString.setExcludeLineNumbers(); 95 if (isComposite()) { 96 Deque<SegmentedSubstring>::iterator it = m_substrings.begin(); 97 Deque<SegmentedSubstring>::iterator e = m_substrings.end(); 98 for (; it != e; ++it) 99 it->setExcludeLineNumbers(); 100 } 71 101 } 72 102 73 103 void SegmentedString::clear() 74 104 { 75 m_currentSubstring.length = 0; 76 m_otherSubstrings.clear(); 77 78 m_isClosed = false; 79 80 m_currentCharacter = 0; 81 82 m_numberOfCharactersConsumedPriorToCurrentSubstring = 0; 105 m_pushedChar1 = 0; 106 m_pushedChar2 = 0; 107 m_currentChar = 0; 108 m_currentString.clear(); 109 m_numberOfCharactersConsumedPriorToCurrentString = 0; 83 110 m_numberOfCharactersConsumedPriorToCurrentLine = 0; 84 111 m_currentLine = 0; 85 86 updateAdvanceFunctionPointersForEmptyString(); 87 } 88 89 inline void SegmentedString::appendSubstring(Substring&& substring) 90 { 91 ASSERT(!m_isClosed); 92 if (!substring.length) 112 m_substrings.clear(); 113 m_closed = false; 114 m_empty = true; 115 m_fastPathFlags = NoFastPath; 116 m_advanceFunc = &SegmentedString::advanceEmpty; 117 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty; 118 } 119 120 void SegmentedString::append(const SegmentedSubstring& s) 121 { 122 ASSERT(!m_closed); 123 if (!s.m_length) 93 124 return; 94 if (m_currentSubstring.length) 95 m_otherSubstrings.append(WTFMove(substring)); 96 else { 97 m_numberOfCharactersConsumedPriorToCurrentSubstring += m_currentSubstring.numberOfCharactersConsumed(); 98 m_currentSubstring = WTFMove(substring); 99 m_currentCharacter = m_currentSubstring.currentCharacter(); 100 updateAdvanceFunctionPointers(); 101 } 102 } 103 104 void SegmentedString::pushBack(String&& string) 105 { 106 // We never create a substring for an empty string. 107 ASSERT(string.length()); 108 109 // The new substring we will create won't have the doNotExcludeLineNumbers set appropriately. 110 // That was lost when the characters were consumed before pushing them back. But this does 111 // not matter, because clients never use this for newlines. Catch that with this assertion. 112 ASSERT(!string.contains('\n')); 113 114 // The characters in the string must be previously consumed characters from this segmented string. 115 ASSERT(string.length() <= numberOfCharactersConsumed()); 116 117 m_numberOfCharactersConsumedPriorToCurrentSubstring += m_currentSubstring.numberOfCharactersConsumed(); 118 if (m_currentSubstring.length) 119 m_otherSubstrings.prepend(WTFMove(m_currentSubstring)); 120 m_currentSubstring = WTFMove(string); 121 m_numberOfCharactersConsumedPriorToCurrentSubstring -= m_currentSubstring.length; 122 m_currentCharacter = m_currentSubstring.currentCharacter(); 123 updateAdvanceFunctionPointers(); 125 126 if (!m_currentString.m_length) { 127 m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed(); 128 m_currentString = s; 129 updateAdvanceFunctionPointers(); 130 } else 131 m_substrings.append(s); 132 m_empty = false; 133 } 134 135 void SegmentedString::pushBack(const SegmentedSubstring& s) 136 { 137 ASSERT(!m_pushedChar1); 138 ASSERT(!s.numberOfCharactersConsumed()); 139 if (!s.m_length) 140 return; 141 142 // FIXME: We're assuming that the characters were originally consumed by 143 // this SegmentedString. We're also ASSERTing that s is a fresh 144 // SegmentedSubstring. These assumptions are sufficient for our 145 // current use, but we might need to handle the more elaborate 146 // cases in the future. 147 m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed(); 148 m_numberOfCharactersConsumedPriorToCurrentString -= s.m_length; 149 if (!m_currentString.m_length) { 150 m_currentString = s; 151 updateAdvanceFunctionPointers(); 152 } else { 153 // Shift our m_currentString into our list. 154 m_substrings.prepend(m_currentString); 155 m_currentString = s; 156 updateAdvanceFunctionPointers(); 157 } 158 m_empty = false; 124 159 } 125 160 126 161 void SegmentedString::close() 127 162 { 128 ASSERT(!m_isClosed); 129 m_isClosed = true; 130 } 131 132 void SegmentedString::append(const SegmentedString& string) 133 { 134 appendSubstring(Substring { string.m_currentSubstring }); 135 for (auto& substring : string.m_otherSubstrings) 136 m_otherSubstrings.append(substring); 137 } 138 139 void SegmentedString::append(SegmentedString&& string) 140 { 141 appendSubstring(WTFMove(string.m_currentSubstring)); 142 for (auto& substring : string.m_otherSubstrings) 143 m_otherSubstrings.append(WTFMove(substring)); 144 } 145 146 void SegmentedString::append(String&& string) 147 { 148 appendSubstring(WTFMove(string)); 149 } 150 151 void SegmentedString::append(const String& string) 152 { 153 appendSubstring(String { string }); 163 // Closing a stream twice is likely a coding mistake. 164 ASSERT(!m_closed); 165 m_closed = true; 166 } 167 168 void SegmentedString::append(const SegmentedString& s) 169 { 170 ASSERT(!m_closed); 171 ASSERT(!s.m_pushedChar1); 172 append(s.m_currentString); 173 if (s.isComposite()) { 174 Deque<SegmentedSubstring>::const_iterator it = s.m_substrings.begin(); 175 Deque<SegmentedSubstring>::const_iterator e = s.m_substrings.end(); 176 for (; it != e; ++it) 177 append(*it); 178 } 179 m_currentChar = m_pushedChar1 ? m_pushedChar1 : (m_currentString.m_length ? m_currentString.getCurrentChar() : 0); 180 } 181 182 void SegmentedString::pushBack(const SegmentedString& s) 183 { 184 ASSERT(!m_pushedChar1); 185 ASSERT(!s.m_pushedChar1); 186 if (s.isComposite()) { 187 Deque<SegmentedSubstring>::const_reverse_iterator it = s.m_substrings.rbegin(); 188 Deque<SegmentedSubstring>::const_reverse_iterator e = s.m_substrings.rend(); 189 for (; it != e; ++it) 190 pushBack(*it); 191 } 192 pushBack(s.m_currentString); 193 m_currentChar = m_pushedChar1 ? m_pushedChar1 : (m_currentString.m_length ? m_currentString.getCurrentChar() : 0); 194 } 195 196 void SegmentedString::advanceSubstring() 197 { 198 if (isComposite()) { 199 m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed(); 200 m_currentString = m_substrings.takeFirst(); 201 // If we've previously consumed some characters of the non-current 202 // string, we now account for those characters as part of the current 203 // string, not as part of "prior to current string." 204 m_numberOfCharactersConsumedPriorToCurrentString -= m_currentString.numberOfCharactersConsumed(); 205 updateAdvanceFunctionPointers(); 206 } else { 207 m_currentString.clear(); 208 m_empty = true; 209 m_fastPathFlags = NoFastPath; 210 m_advanceFunc = &SegmentedString::advanceEmpty; 211 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty; 212 } 154 213 } 155 214 … … 157 216 { 158 217 StringBuilder result; 159 m_currentSubstring.appendTo(result); 160 for (auto& substring : m_otherSubstrings) 161 substring.appendTo(result); 218 if (m_pushedChar1) { 219 result.append(m_pushedChar1); 220 if (m_pushedChar2) 221 result.append(m_pushedChar2); 222 } 223 m_currentString.appendTo(result); 224 if (isComposite()) { 225 Deque<SegmentedSubstring>::const_iterator it = m_substrings.begin(); 226 Deque<SegmentedSubstring>::const_iterator e = m_substrings.end(); 227 for (; it != e; ++it) 228 it->appendTo(result); 229 } 162 230 return result.toString(); 163 231 } 164 232 165 void SegmentedString::advanceWithoutUpdatingLineNumber16() 166 { 167 m_currentCharacter = *++m_currentSubstring.currentCharacter16; 233 void SegmentedString::advancePastNonNewlines(unsigned count, UChar* consumedCharacters) 234 { 235 ASSERT_WITH_SECURITY_IMPLICATION(count <= length()); 236 for (unsigned i = 0; i < count; ++i) { 237 consumedCharacters[i] = currentChar(); 238 advancePastNonNewline(); 239 } 240 } 241 242 void SegmentedString::advance8() 243 { 244 ASSERT(!m_pushedChar1); 168 245 decrementAndCheckLength(); 246 m_currentChar = m_currentString.incrementAndGetCurrentChar8(); 247 } 248 249 void SegmentedString::advance16() 250 { 251 ASSERT(!m_pushedChar1); 252 decrementAndCheckLength(); 253 m_currentChar = m_currentString.incrementAndGetCurrentChar16(); 254 } 255 256 void SegmentedString::advanceAndUpdateLineNumber8() 257 { 258 ASSERT(!m_pushedChar1); 259 ASSERT(m_currentString.getCurrentChar() == m_currentChar); 260 if (m_currentChar == '\n') { 261 ++m_currentLine; 262 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1; 263 } 264 decrementAndCheckLength(); 265 m_currentChar = m_currentString.incrementAndGetCurrentChar8(); 169 266 } 170 267 171 268 void SegmentedString::advanceAndUpdateLineNumber16() 172 269 { 173 ASSERT(m_currentSubstring.doNotExcludeLineNumbers); 174 processPossibleNewline(); 175 m_currentCharacter = *++m_currentSubstring.currentCharacter16; 270 ASSERT(!m_pushedChar1); 271 ASSERT(m_currentString.getCurrentChar() == m_currentChar); 272 if (m_currentChar == '\n') { 273 ++m_currentLine; 274 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1; 275 } 176 276 decrementAndCheckLength(); 177 } 178 179 inline void SegmentedString::advancePastSingleCharacterSubstringWithoutUpdatingLineNumber() 180 { 181 ASSERT(m_currentSubstring.length == 1); 182 if (m_otherSubstrings.isEmpty()) { 183 m_currentSubstring.length = 0; 184 m_currentCharacter = 0; 185 updateAdvanceFunctionPointersForEmptyString(); 186 return; 187 } 188 m_numberOfCharactersConsumedPriorToCurrentSubstring += m_currentSubstring.numberOfCharactersConsumed(); 189 m_currentSubstring = m_otherSubstrings.takeFirst(); 190 // If we've previously consumed some characters of the non-current string, we now account for those 191 // characters as part of the current string, not as part of "prior to current string." 192 m_numberOfCharactersConsumedPriorToCurrentSubstring -= m_currentSubstring.numberOfCharactersConsumed(); 193 m_currentCharacter = m_currentSubstring.currentCharacter(); 194 updateAdvanceFunctionPointers(); 195 } 196 197 void SegmentedString::advancePastSingleCharacterSubstring() 198 { 199 ASSERT(m_currentSubstring.length == 1); 200 ASSERT(m_currentSubstring.doNotExcludeLineNumbers); 201 processPossibleNewline(); 202 advancePastSingleCharacterSubstringWithoutUpdatingLineNumber(); 277 m_currentChar = m_currentString.incrementAndGetCurrentChar16(); 278 } 279 280 void SegmentedString::advanceSlowCase() 281 { 282 if (m_pushedChar1) { 283 m_pushedChar1 = m_pushedChar2; 284 m_pushedChar2 = 0; 285 286 if (m_pushedChar1) { 287 m_currentChar = m_pushedChar1; 288 return; 289 } 290 291 updateAdvanceFunctionPointers(); 292 } else if (m_currentString.m_length) { 293 if (--m_currentString.m_length == 0) 294 advanceSubstring(); 295 } else if (!isComposite()) { 296 m_currentString.clear(); 297 m_empty = true; 298 m_fastPathFlags = NoFastPath; 299 m_advanceFunc = &SegmentedString::advanceEmpty; 300 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty; 301 } 302 m_currentChar = m_currentString.m_length ? m_currentString.getCurrentChar() : 0; 303 } 304 305 void SegmentedString::advanceAndUpdateLineNumberSlowCase() 306 { 307 if (m_pushedChar1) { 308 m_pushedChar1 = m_pushedChar2; 309 m_pushedChar2 = 0; 310 311 if (m_pushedChar1) { 312 m_currentChar = m_pushedChar1; 313 return; 314 } 315 316 updateAdvanceFunctionPointers(); 317 } else if (m_currentString.m_length) { 318 if (m_currentString.getCurrentChar() == '\n' && m_currentString.doNotExcludeLineNumbers()) { 319 ++m_currentLine; 320 // Plus 1 because numberOfCharactersConsumed value hasn't incremented yet; it does with m_length decrement below. 321 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1; 322 } 323 if (--m_currentString.m_length == 0) 324 advanceSubstring(); 325 else 326 m_currentString.incrementAndGetCurrentChar(); // Only need the ++ 327 } else if (!isComposite()) { 328 m_currentString.clear(); 329 m_empty = true; 330 m_fastPathFlags = NoFastPath; 331 m_advanceFunc = &SegmentedString::advanceEmpty; 332 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty; 333 } 334 335 m_currentChar = m_currentString.m_length ? m_currentString.getCurrentChar() : 0; 203 336 } 204 337 205 338 void SegmentedString::advanceEmpty() 206 339 { 207 ASSERT(!m_currentSubstring.length); 208 ASSERT(m_otherSubstrings.isEmpty()); 209 ASSERT(!m_currentCharacter); 210 } 211 212 void SegmentedString::updateAdvanceFunctionPointersForSingleCharacterSubstring() 213 { 214 ASSERT(m_currentSubstring.length == 1); 340 ASSERT(!m_currentString.m_length && !isComposite()); 341 m_currentChar = 0; 342 } 343 344 void SegmentedString::updateSlowCaseFunctionPointers() 345 { 215 346 m_fastPathFlags = NoFastPath; 216 m_advanceWithoutUpdatingLineNumberFunction = &SegmentedString::advancePastSingleCharacterSubstringWithoutUpdatingLineNumber; 217 if (m_currentSubstring.doNotExcludeLineNumbers) 218 m_advanceAndUpdateLineNumberFunction = &SegmentedString::advancePastSingleCharacterSubstring; 219 else 220 m_advanceAndUpdateLineNumberFunction = &SegmentedString::advancePastSingleCharacterSubstringWithoutUpdatingLineNumber; 347 m_advanceFunc = &SegmentedString::advanceSlowCase; 348 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumberSlowCase; 221 349 } 222 350 … … 237 365 } 238 366 239 SegmentedString::AdvancePastResult SegmentedString::advancePastSlowCase(const char* literal, bool lettersIgnoringASCIICase) 240 { 241 constexpr unsigned maxLength = 10; 242 ASSERT(!strchr(literal, '\n')); 243 auto length = strlen(literal); 244 ASSERT(length <= maxLength); 367 SegmentedString::AdvancePastResult SegmentedString::advancePastSlowCase(const char* literal, bool caseSensitive) 368 { 369 unsigned length = strlen(literal); 245 370 if (length > this->length()) 246 371 return NotEnoughCharacters; 247 UChar consumedCharacters[maxLength]; 248 for (unsigned i = 0; i < length; ++i) { 249 auto character = m_currentCharacter; 250 if (characterMismatch(character, literal[i], lettersIgnoringASCIICase)) { 251 if (i) 252 pushBack(String { consumedCharacters, i }); 253 return DidNotMatch; 254 } 255 advancePastNonNewline(); 256 consumedCharacters[i] = character; 257 } 258 return DidMatch; 259 } 260 261 void SegmentedString::updateAdvanceFunctionPointersForEmptyString() 262 { 263 ASSERT(!m_currentSubstring.length); 264 ASSERT(m_otherSubstrings.isEmpty()); 265 ASSERT(!m_currentCharacter); 266 m_fastPathFlags = NoFastPath; 267 m_advanceWithoutUpdatingLineNumberFunction = &SegmentedString::advanceEmpty; 268 m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceEmpty; 269 } 270 271 } 372 UChar* consumedCharacters; 373 String consumedString = String::createUninitialized(length, consumedCharacters); 374 advancePastNonNewlines(length, consumedCharacters); 375 if (consumedString.startsWith(literal, caseSensitive)) 376 return DidMatch; 377 pushBack(SegmentedString(consumedString)); 378 return DidNotMatch; 379 } 380 381 } -
trunk/Source/WebCore/platform/text/SegmentedString.h
r209058 r209120 1 1 /* 2 Copyright (C) 2004-20 16Apple Inc. All rights reserved.2 Copyright (C) 2004-2008, 2015 Apple Inc. All rights reserved. 3 3 4 4 This library is free software; you can redistribute it and/or … … 18 18 */ 19 19 20 #pragma once 20 #ifndef SegmentedString_h 21 #define SegmentedString_h 21 22 22 23 #include <wtf/Deque.h> 23 #include <wtf/text/ WTFString.h>24 #include <wtf/text/StringBuilder.h> 24 25 25 26 namespace WebCore { 26 27 27 // FIXME: This should not start with "k". 28 // FIXME: This is a shared tokenizer concept, not a SegmentedString concept, but this is the only common header for now. 29 constexpr LChar kEndOfFileMarker = 0; 28 class SegmentedString; 29 30 class SegmentedSubstring { 31 public: 32 SegmentedSubstring() 33 : m_length(0) 34 , m_doNotExcludeLineNumbers(true) 35 , m_is8Bit(false) 36 { 37 m_data.string16Ptr = 0; 38 } 39 40 SegmentedSubstring(const String& str) 41 : m_length(str.length()) 42 , m_doNotExcludeLineNumbers(true) 43 , m_string(str) 44 { 45 if (m_length) { 46 if (m_string.is8Bit()) { 47 m_is8Bit = true; 48 m_data.string8Ptr = m_string.characters8(); 49 } else { 50 m_is8Bit = false; 51 m_data.string16Ptr = m_string.characters16(); 52 } 53 } else 54 m_is8Bit = false; 55 } 56 57 void clear() { m_length = 0; m_data.string16Ptr = 0; m_is8Bit = false;} 58 59 bool is8Bit() { return m_is8Bit; } 60 61 bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; } 62 bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; } 63 64 void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; } 65 66 int numberOfCharactersConsumed() const { return m_string.length() - m_length; } 67 68 void appendTo(StringBuilder& builder) const 69 { 70 int offset = m_string.length() - m_length; 71 72 if (!offset) { 73 if (m_length) 74 builder.append(m_string); 75 } else 76 builder.append(m_string.substring(offset, m_length)); 77 } 78 79 UChar getCurrentChar8() 80 { 81 return *m_data.string8Ptr; 82 } 83 84 UChar getCurrentChar16() 85 { 86 return m_data.string16Ptr ? *m_data.string16Ptr : 0; 87 } 88 89 UChar incrementAndGetCurrentChar8() 90 { 91 ASSERT(m_data.string8Ptr); 92 return *++m_data.string8Ptr; 93 } 94 95 UChar incrementAndGetCurrentChar16() 96 { 97 ASSERT(m_data.string16Ptr); 98 return *++m_data.string16Ptr; 99 } 100 101 String currentSubString(unsigned length) 102 { 103 int offset = m_string.length() - m_length; 104 return m_string.substring(offset, length); 105 } 106 107 ALWAYS_INLINE UChar getCurrentChar() 108 { 109 ASSERT(m_length); 110 if (is8Bit()) 111 return getCurrentChar8(); 112 return getCurrentChar16(); 113 } 114 115 ALWAYS_INLINE UChar incrementAndGetCurrentChar() 116 { 117 ASSERT(m_length); 118 if (is8Bit()) 119 return incrementAndGetCurrentChar8(); 120 return incrementAndGetCurrentChar16(); 121 } 122 123 public: 124 union { 125 const LChar* string8Ptr; 126 const UChar* string16Ptr; 127 } m_data; 128 int m_length; 129 130 private: 131 bool m_doNotExcludeLineNumbers; 132 bool m_is8Bit; 133 String m_string; 134 }; 30 135 31 136 class SegmentedString { 32 137 public: 33 SegmentedString() = default; 34 SegmentedString(String&&); 35 SegmentedString(const String&); 36 37 SegmentedString(SegmentedString&&) = delete; 38 SegmentedString(const SegmentedString&) = delete; 39 40 SegmentedString& operator=(SegmentedString&&); 41 SegmentedString& operator=(const SegmentedString&) = default; 138 SegmentedString() 139 : m_pushedChar1(0) 140 , m_pushedChar2(0) 141 , m_currentChar(0) 142 , m_numberOfCharactersConsumedPriorToCurrentString(0) 143 , m_numberOfCharactersConsumedPriorToCurrentLine(0) 144 , m_currentLine(0) 145 , m_closed(false) 146 , m_empty(true) 147 , m_fastPathFlags(NoFastPath) 148 , m_advanceFunc(&SegmentedString::advanceEmpty) 149 , m_advanceAndUpdateLineNumberFunc(&SegmentedString::advanceEmpty) 150 { 151 } 152 153 SegmentedString(const String& str) 154 : m_pushedChar1(0) 155 , m_pushedChar2(0) 156 , m_currentString(str) 157 , m_currentChar(0) 158 , m_numberOfCharactersConsumedPriorToCurrentString(0) 159 , m_numberOfCharactersConsumedPriorToCurrentLine(0) 160 , m_currentLine(0) 161 , m_closed(false) 162 , m_empty(!str.length()) 163 , m_fastPathFlags(NoFastPath) 164 { 165 if (m_currentString.m_length) 166 m_currentChar = m_currentString.getCurrentChar(); 167 updateAdvanceFunctionPointers(); 168 } 169 170 SegmentedString(const SegmentedString&); 171 SegmentedString& operator=(const SegmentedString&); 42 172 43 173 void clear(); 44 174 void close(); 45 175 46 void append(SegmentedString&&);47 176 void append(const SegmentedString&); 48 49 void append(String&&); 50 void append(const String&); 51 52 void pushBack(String&&); 177 void pushBack(const SegmentedString&); 53 178 54 179 void setExcludeLineNumbers(); 55 180 56 bool isEmpty() const { return !m_currentSubstring.length; } 181 void push(UChar c) 182 { 183 if (!m_pushedChar1) { 184 m_pushedChar1 = c; 185 m_currentChar = m_pushedChar1 ? m_pushedChar1 : m_currentString.getCurrentChar(); 186 updateSlowCaseFunctionPointers(); 187 } else { 188 ASSERT(!m_pushedChar2); 189 m_pushedChar2 = c; 190 } 191 } 192 193 bool isEmpty() const { return m_empty; } 57 194 unsigned length() const; 58 195 59 bool isClosed() const { return m_isClosed; } 60 61 void advance(); 62 void advancePastNonNewline(); // Faster than calling advance when we know the current character is not a newline. 63 void advancePastNewline(); // Faster than calling advance when we know the current character is a newline. 196 bool isClosed() const { return m_closed; } 64 197 65 198 enum AdvancePastResult { DidNotMatch, DidMatch, NotEnoughCharacters }; 66 template<unsigned length> AdvancePastResult advancePast(const char (&literal)[length]) { return advancePast<length, false>(literal); } 67 template<unsigned length> AdvancePastResult advancePastLettersIgnoringASCIICase(const char (&literal)[length]) { return advancePast<length, true>(literal); } 68 69 unsigned numberOfCharactersConsumed() const; 199 template<unsigned length> AdvancePastResult advancePast(const char (&literal)[length]) { return advancePast(literal, length - 1, true); } 200 template<unsigned length> AdvancePastResult advancePastIgnoringCase(const char (&literal)[length]) { return advancePast(literal, length - 1, false); } 201 202 void advance() 203 { 204 if (m_fastPathFlags & Use8BitAdvance) { 205 ASSERT(!m_pushedChar1); 206 bool haveOneCharacterLeft = (--m_currentString.m_length == 1); 207 m_currentChar = m_currentString.incrementAndGetCurrentChar8(); 208 209 if (!haveOneCharacterLeft) 210 return; 211 212 updateSlowCaseFunctionPointers(); 213 214 return; 215 } 216 217 (this->*m_advanceFunc)(); 218 } 219 220 void advanceAndUpdateLineNumber() 221 { 222 if (m_fastPathFlags & Use8BitAdvance) { 223 ASSERT(!m_pushedChar1); 224 225 bool haveNewLine = (m_currentChar == '\n') & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers); 226 bool haveOneCharacterLeft = (--m_currentString.m_length == 1); 227 228 m_currentChar = m_currentString.incrementAndGetCurrentChar8(); 229 230 if (!(haveNewLine | haveOneCharacterLeft)) 231 return; 232 233 if (haveNewLine) { 234 ++m_currentLine; 235 m_numberOfCharactersConsumedPriorToCurrentLine = m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed(); 236 } 237 238 if (haveOneCharacterLeft) 239 updateSlowCaseFunctionPointers(); 240 241 return; 242 } 243 244 (this->*m_advanceAndUpdateLineNumberFunc)(); 245 } 246 247 void advancePastNonNewline() 248 { 249 ASSERT(currentChar() != '\n'); 250 advance(); 251 } 252 253 void advancePastNewlineAndUpdateLineNumber() 254 { 255 ASSERT(currentChar() == '\n'); 256 if (!m_pushedChar1 && m_currentString.m_length > 1) { 257 int newLineFlag = m_currentString.doNotExcludeLineNumbers(); 258 m_currentLine += newLineFlag; 259 if (newLineFlag) 260 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1; 261 decrementAndCheckLength(); 262 m_currentChar = m_currentString.incrementAndGetCurrentChar(); 263 return; 264 } 265 advanceAndUpdateLineNumberSlowCase(); 266 } 267 268 int numberOfCharactersConsumed() const 269 { 270 int numberOfPushedCharacters = 0; 271 if (m_pushedChar1) { 272 ++numberOfPushedCharacters; 273 if (m_pushedChar2) 274 ++numberOfPushedCharacters; 275 } 276 return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed() - numberOfPushedCharacters; 277 } 70 278 71 279 String toString() const; 72 280 73 UChar currentChar acter() const { return m_currentCharacter; }281 UChar currentChar() const { return m_currentChar; } 74 282 75 283 OrdinalNumber currentColumn() const; … … 81 289 82 290 private: 83 struct Substring {84 Substring() = default;85 Substring(String&&);86 87 UChar currentCharacter() const;88 UChar currentCharacterPreIncrement();89 90 unsigned numberOfCharactersConsumed() const;91 void appendTo(StringBuilder&) const;92 93 String string;94 unsigned length { 0 };95 bool is8Bit;96 union {97 const LChar* currentCharacter8;98 const UChar* currentCharacter16;99 };100 bool doNotExcludeLineNumbers { true };101 };102 103 291 enum FastPathFlags { 104 292 NoFastPath = 0, … … 107 295 }; 108 296 109 void appendSubstring(Substring&&); 110 111 void processPossibleNewline(); 112 void startNewLine(); 113 114 void advanceWithoutUpdatingLineNumber(); 115 void advanceWithoutUpdatingLineNumber16(); 297 void append(const SegmentedSubstring&); 298 void pushBack(const SegmentedSubstring&); 299 300 void advance8(); 301 void advance16(); 302 void advanceAndUpdateLineNumber8(); 116 303 void advanceAndUpdateLineNumber16(); 117 void advance PastSingleCharacterSubstringWithoutUpdatingLineNumber();118 void advance PastSingleCharacterSubstring();304 void advanceSlowCase(); 305 void advanceAndUpdateLineNumberSlowCase(); 119 306 void advanceEmpty(); 120 121 void updateAdvanceFunctionPointers(); 122 void updateAdvanceFunctionPointersForEmptyString(); 123 void updateAdvanceFunctionPointersForSingleCharacterSubstring(); 124 125 void decrementAndCheckLength(); 126 127 template<typename CharacterType> static bool characterMismatch(CharacterType, char, bool lettersIgnoringASCIICase); 128 template<unsigned length, bool lettersIgnoringASCIICase> AdvancePastResult advancePast(const char (&literal)[length]); 129 AdvancePastResult advancePastSlowCase(const char* literal, bool lettersIgnoringASCIICase); 130 131 Substring m_currentSubstring; 132 Deque<Substring> m_otherSubstrings; 133 134 bool m_isClosed { false }; 135 136 UChar m_currentCharacter { 0 }; 137 138 unsigned m_numberOfCharactersConsumedPriorToCurrentSubstring { 0 }; 139 unsigned m_numberOfCharactersConsumedPriorToCurrentLine { 0 }; 140 int m_currentLine { 0 }; 141 142 unsigned char m_fastPathFlags { NoFastPath }; 143 void (SegmentedString::*m_advanceWithoutUpdatingLineNumberFunction)() { &SegmentedString::advanceEmpty }; 144 void (SegmentedString::*m_advanceAndUpdateLineNumberFunction)() { &SegmentedString::advanceEmpty }; 307 void advanceSubstring(); 308 309 void updateSlowCaseFunctionPointers(); 310 311 void decrementAndCheckLength() 312 { 313 ASSERT(m_currentString.m_length > 1); 314 if (--m_currentString.m_length == 1) 315 updateSlowCaseFunctionPointers(); 316 } 317 318 void updateAdvanceFunctionPointers() 319 { 320 if ((m_currentString.m_length > 1) && !m_pushedChar1) { 321 if (m_currentString.is8Bit()) { 322 m_advanceFunc = &SegmentedString::advance8; 323 m_fastPathFlags = Use8BitAdvance; 324 if (m_currentString.doNotExcludeLineNumbers()) { 325 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber8; 326 m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers; 327 } else 328 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance8; 329 return; 330 } 331 332 m_advanceFunc = &SegmentedString::advance16; 333 m_fastPathFlags = NoFastPath; 334 if (m_currentString.doNotExcludeLineNumbers()) 335 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber16; 336 else 337 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance16; 338 return; 339 } 340 341 if (!m_currentString.m_length && !isComposite()) { 342 m_advanceFunc = &SegmentedString::advanceEmpty; 343 m_fastPathFlags = NoFastPath; 344 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty; 345 } 346 347 updateSlowCaseFunctionPointers(); 348 } 349 350 // Writes consumed characters into consumedCharacters, which must have space for at least |count| characters. 351 void advancePastNonNewlines(unsigned count); 352 void advancePastNonNewlines(unsigned count, UChar* consumedCharacters); 353 354 AdvancePastResult advancePast(const char* literal, unsigned length, bool caseSensitive); 355 AdvancePastResult advancePastSlowCase(const char* literal, bool caseSensitive); 356 357 bool isComposite() const { return !m_substrings.isEmpty(); } 358 359 UChar m_pushedChar1; 360 UChar m_pushedChar2; 361 SegmentedSubstring m_currentString; 362 UChar m_currentChar; 363 int m_numberOfCharactersConsumedPriorToCurrentString; 364 int m_numberOfCharactersConsumedPriorToCurrentLine; 365 int m_currentLine; 366 Deque<SegmentedSubstring> m_substrings; 367 bool m_closed; 368 bool m_empty; 369 unsigned char m_fastPathFlags; 370 void (SegmentedString::*m_advanceFunc)(); 371 void (SegmentedString::*m_advanceAndUpdateLineNumberFunc)(); 145 372 }; 146 373 147 inline SegmentedString::Substring::Substring(String&& passedString) 148 : string(WTFMove(passedString)) 149 , length(string.length()) 374 inline void SegmentedString::advancePastNonNewlines(unsigned count) 150 375 { 151 if (length) { 152 is8Bit = string.impl()->is8Bit(); 153 if (is8Bit) 154 currentCharacter8 = string.impl()->characters8(); 155 else 156 currentCharacter16 = string.impl()->characters16(); 157 } 376 for (unsigned i = 0; i < count; ++i) 377 advancePastNonNewline(); 158 378 } 159 379 160 inline unsigned SegmentedString::Substring::numberOfCharactersConsumed() const380 inline SegmentedString::AdvancePastResult SegmentedString::advancePast(const char* literal, unsigned length, bool caseSensitive) 161 381 { 162 return string.length() - length; 382 ASSERT(strlen(literal) == length); 383 ASSERT(!strchr(literal, '\n')); 384 if (!m_pushedChar1) { 385 if (length <= static_cast<unsigned>(m_currentString.m_length)) { 386 if (!m_currentString.currentSubString(length).startsWith(literal, caseSensitive)) 387 return DidNotMatch; 388 advancePastNonNewlines(length); 389 return DidMatch; 390 } 391 } 392 return advancePastSlowCase(literal, caseSensitive); 163 393 } 164 394 165 ALWAYS_INLINE UChar SegmentedString::Substring::currentCharacter() const166 {167 ASSERT(length);168 return is8Bit ? *currentCharacter8 : *currentCharacter16;169 395 } 170 396 171 ALWAYS_INLINE UChar SegmentedString::Substring::currentCharacterPreIncrement() 172 { 173 ASSERT(length); 174 return is8Bit ? *++currentCharacter8 : *++currentCharacter16; 175 } 176 177 inline SegmentedString::SegmentedString(String&& string) 178 : m_currentSubstring(WTFMove(string)) 179 { 180 if (m_currentSubstring.length) { 181 m_currentCharacter = m_currentSubstring.currentCharacter(); 182 updateAdvanceFunctionPointers(); 183 } 184 } 185 186 inline SegmentedString::SegmentedString(const String& string) 187 : SegmentedString(String { string }) 188 { 189 } 190 191 ALWAYS_INLINE void SegmentedString::decrementAndCheckLength() 192 { 193 ASSERT(m_currentSubstring.length > 1); 194 if (UNLIKELY(--m_currentSubstring.length == 1)) 195 updateAdvanceFunctionPointersForSingleCharacterSubstring(); 196 } 197 198 ALWAYS_INLINE void SegmentedString::advanceWithoutUpdatingLineNumber() 199 { 200 if (LIKELY(m_fastPathFlags & Use8BitAdvance)) { 201 m_currentCharacter = *++m_currentSubstring.currentCharacter8; 202 decrementAndCheckLength(); 203 return; 204 } 205 206 (this->*m_advanceWithoutUpdatingLineNumberFunction)(); 207 } 208 209 inline void SegmentedString::startNewLine() 210 { 211 ++m_currentLine; 212 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed(); 213 } 214 215 inline void SegmentedString::processPossibleNewline() 216 { 217 if (m_currentCharacter == '\n') 218 startNewLine(); 219 } 220 221 inline void SegmentedString::advance() 222 { 223 if (LIKELY(m_fastPathFlags & Use8BitAdvance)) { 224 ASSERT(m_currentSubstring.length > 1); 225 bool lastCharacterWasNewline = m_currentCharacter == '\n'; 226 m_currentCharacter = *++m_currentSubstring.currentCharacter8; 227 bool haveOneCharacterLeft = --m_currentSubstring.length == 1; 228 if (LIKELY(!(lastCharacterWasNewline | haveOneCharacterLeft))) 229 return; 230 if (lastCharacterWasNewline & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers)) 231 startNewLine(); 232 if (haveOneCharacterLeft) 233 updateAdvanceFunctionPointersForSingleCharacterSubstring(); 234 return; 235 } 236 237 (this->*m_advanceAndUpdateLineNumberFunction)(); 238 } 239 240 ALWAYS_INLINE void SegmentedString::advancePastNonNewline() 241 { 242 ASSERT(m_currentCharacter != '\n'); 243 advanceWithoutUpdatingLineNumber(); 244 } 245 246 inline void SegmentedString::advancePastNewline() 247 { 248 ASSERT(m_currentCharacter == '\n'); 249 if (m_currentSubstring.length > 1) { 250 if (m_currentSubstring.doNotExcludeLineNumbers) 251 startNewLine(); 252 m_currentCharacter = m_currentSubstring.currentCharacterPreIncrement(); 253 decrementAndCheckLength(); 254 return; 255 } 256 257 (this->*m_advanceAndUpdateLineNumberFunction)(); 258 } 259 260 inline unsigned SegmentedString::numberOfCharactersConsumed() const 261 { 262 return m_numberOfCharactersConsumedPriorToCurrentSubstring + m_currentSubstring.numberOfCharactersConsumed(); 263 } 264 265 template<typename CharacterType> ALWAYS_INLINE bool SegmentedString::characterMismatch(CharacterType a, char b, bool lettersIgnoringASCIICase) 266 { 267 return lettersIgnoringASCIICase ? !isASCIIAlphaCaselessEqual(a, b) : a != b; 268 } 269 270 template<unsigned lengthIncludingTerminator, bool lettersIgnoringASCIICase> SegmentedString::AdvancePastResult SegmentedString::advancePast(const char (&literal)[lengthIncludingTerminator]) 271 { 272 constexpr unsigned length = lengthIncludingTerminator - 1; 273 ASSERT(!literal[length]); 274 ASSERT(!strchr(literal, '\n')); 275 if (length + 1 < m_currentSubstring.length) { 276 if (m_currentSubstring.is8Bit) { 277 for (unsigned i = 0; i < length; ++i) { 278 if (characterMismatch(m_currentSubstring.currentCharacter8[i], literal[i], lettersIgnoringASCIICase)) 279 return DidNotMatch; 280 } 281 m_currentSubstring.currentCharacter8 += length; 282 m_currentCharacter = *m_currentSubstring.currentCharacter8; 283 } else { 284 for (unsigned i = 0; i < length; ++i) { 285 if (characterMismatch(m_currentSubstring.currentCharacter16[i], literal[i], lettersIgnoringASCIICase)) 286 return DidNotMatch; 287 } 288 m_currentSubstring.currentCharacter16 += length; 289 m_currentCharacter = *m_currentSubstring.currentCharacter16; 290 } 291 m_currentSubstring.length -= length; 292 return DidMatch; 293 } 294 return advancePastSlowCase(literal, lettersIgnoringASCIICase); 295 } 296 297 inline void SegmentedString::updateAdvanceFunctionPointers() 298 { 299 if (m_currentSubstring.length > 1) { 300 if (m_currentSubstring.is8Bit) { 301 m_fastPathFlags = Use8BitAdvance; 302 if (m_currentSubstring.doNotExcludeLineNumbers) 303 m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers; 304 return; 305 } 306 m_fastPathFlags = NoFastPath; 307 m_advanceWithoutUpdatingLineNumberFunction = &SegmentedString::advanceWithoutUpdatingLineNumber16; 308 if (m_currentSubstring.doNotExcludeLineNumbers) 309 m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceAndUpdateLineNumber16; 310 else 311 m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceWithoutUpdatingLineNumber16; 312 return; 313 } 314 315 if (!m_currentSubstring.length) { 316 updateAdvanceFunctionPointersForEmptyString(); 317 return; 318 } 319 320 updateAdvanceFunctionPointersForSingleCharacterSubstring(); 321 } 322 323 } 397 #endif -
trunk/Source/WebCore/xml/parser/CharacterReferenceParserInlines.h
r209058 r209120 31 31 namespace WebCore { 32 32 33 inline void unconsumeCharacters(SegmentedString& source, StringBuilder& consumedCharacters)33 inline void unconsumeCharacters(SegmentedString& source, const StringBuilder& consumedCharacters) 34 34 { 35 source.pushBack( consumedCharacters.toString());35 source.pushBack(SegmentedString(consumedCharacters.toStringPreserveCapacity())); 36 36 } 37 37 … … 57 57 58 58 while (!source.isEmpty()) { 59 UChar character = source.currentChar acter();59 UChar character = source.currentChar(); 60 60 switch (state) { 61 61 case Initial: … … 86 86 goto Decimal; 87 87 } 88 source.pushBack( ASCIILiteral("#"));88 source.pushBack(SegmentedString(ASCIILiteral("#"))); 89 89 return false; 90 90 case MaybeHexLowerCaseX: … … 93 93 goto Hex; 94 94 } 95 source.pushBack( ASCIILiteral("#x"));95 source.pushBack(SegmentedString(ASCIILiteral("#x"))); 96 96 return false; 97 97 case MaybeHexUpperCaseX: … … 100 100 goto Hex; 101 101 } 102 source.pushBack( ASCIILiteral("#X"));102 source.pushBack(SegmentedString(ASCIILiteral("#X"))); 103 103 return false; 104 104 case Hex: … … 111 111 } 112 112 if (character == ';') { 113 source.advance PastNonNewline();113 source.advance(); 114 114 decodedCharacter.append(ParserFunctions::legalEntityFor(overflow ? 0 : result)); 115 115 return true; … … 130 130 } 131 131 if (character == ';') { 132 source.advance PastNonNewline();132 source.advance(); 133 133 decodedCharacter.append(ParserFunctions::legalEntityFor(overflow ? 0 : result)); 134 134 return true; … … 145 145 } 146 146 consumedCharacters.append(character); 147 source.advance PastNonNewline();147 source.advance(); 148 148 } 149 149 ASSERT(source.isEmpty()); -
trunk/Source/WebCore/xml/parser/MarkupTokenizerInlines.h
r209058 r209120 1 1 /* 2 * Copyright (C) 2008 -2016Apple Inc. All Rights Reserved.2 * Copyright (C) 2008, 2015 Apple Inc. All Rights Reserved. 3 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ 4 4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. … … 28 28 #pragma once 29 29 30 #include "SegmentedString.h" 31 30 32 #if COMPILER(MSVC) 31 33 // Disable the "unreachable code" warning so we can compile the ASSERT_NOT_REACHED in the END_STATE macro. … … 43 45 case stateName: \ 44 46 stateName: { \ 45 const expr auto currentState = stateName;\47 const auto currentState = stateName; \ 46 48 UNUSED_PARAM(currentState); 47 49 … … 73 75 goto newState; \ 74 76 } while (false) 75 #define ADVANCE_PAST_NON_NEWLINE_TO(newState) \76 do { \77 if (!m_preprocessor.advancePastNonNewline(source, isNullCharacterSkippingState(newState))) { \78 m_state = newState; \79 return haveBufferedCharacterToken(); \80 } \81 character = m_preprocessor.nextInputCharacter(); \82 goto newState; \83 } while (false)84 77 85 78 // For more complex cases, caller consumes the characters first and then uses this macro. -
trunk/Source/WebCore/xml/parser/XMLDocumentParser.cpp
r209058 r209120 101 101 } 102 102 103 void XMLDocumentParser::insert( SegmentedString&&)103 void XMLDocumentParser::insert(const SegmentedString&) 104 104 { 105 105 ASSERT_NOT_REACHED(); … … 108 108 void XMLDocumentParser::append(RefPtr<StringImpl>&& inputSource) 109 109 { 110 String source { WTFMove(inputSource) }; 111 110 SegmentedString source(WTFMove(inputSource)); 112 111 if (m_sawXSLTransform || !m_sawFirstElement) 113 112 m_originalSourceForTransform.append(source); … … 121 120 } 122 121 123 doWrite(source );122 doWrite(source.toString()); 124 123 125 124 // After parsing, dispatch image beforeload events. … … 154 153 } 155 154 155 156 156 bool XMLDocumentParser::updateLeafTextNode() 157 157 { -
trunk/Source/WebCore/xml/parser/XMLDocumentParser.h
r209058 r209120 1 1 /* 2 2 * Copyright (C) 2000 Peter Kelly (pmk@post.com) 3 * Copyright (C) 2005 -2016Apple Inc. All rights reserved.3 * Copyright (C) 2005, 2006, 2007 Apple Inc. All rights reserved. 4 4 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org) 5 5 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) … … 30 30 #include "SegmentedString.h" 31 31 #include "XMLErrors.h" 32 #include <libxml/tree.h>33 #include <libxml/xmlstring.h>34 32 #include <wtf/HashMap.h> 35 33 #include <wtf/text/AtomicStringHash.h> 36 34 #include <wtf/text/CString.h> 35 36 #include <libxml/tree.h> 37 #include <libxml/xmlstring.h> 37 38 38 39 namespace WebCore { … … 41 42 class CachedResourceLoader; 42 43 class DocumentFragment; 44 class Document; 43 45 class Element; 44 46 class FrameView; 45 47 class PendingCallbacks; 48 class PendingScript; 46 49 class Text; 47 50 48 class XMLParserContext : public RefCounted<XMLParserContext> {49 public:50 static RefPtr<XMLParserContext> createMemoryParser(xmlSAXHandlerPtr, void* userData, const CString& chunk);51 static Ref<XMLParserContext> createStringParser(xmlSAXHandlerPtr, void* userData);52 ~XMLParserContext();53 xmlParserCtxtPtr context() const { return m_context; }51 class XMLParserContext : public RefCounted<XMLParserContext> { 52 public: 53 static RefPtr<XMLParserContext> createMemoryParser(xmlSAXHandlerPtr, void* userData, const CString& chunk); 54 static Ref<XMLParserContext> createStringParser(xmlSAXHandlerPtr, void* userData); 55 ~XMLParserContext(); 56 xmlParserCtxtPtr context() const { return m_context; } 54 57 55 private:56 XMLParserContext(xmlParserCtxtPtr context)57 : m_context(context)58 {59 }60 xmlParserCtxtPtr m_context;61 };58 private: 59 XMLParserContext(xmlParserCtxtPtr context) 60 : m_context(context) 61 { 62 } 63 xmlParserCtxtPtr m_context; 64 }; 62 65 63 class XMLDocumentParser final : public ScriptableDocumentParser, public PendingScriptClient {64 WTF_MAKE_FAST_ALLOCATED;65 public:66 static Ref<XMLDocumentParser> create(Document& document, FrameView* view)67 {68 return adoptRef(*new XMLDocumentParser(document, view));69 }70 static Ref<XMLDocumentParser> create(DocumentFragment& fragment, Element* element, ParserContentPolicy parserContentPolicy)71 {72 return adoptRef(*new XMLDocumentParser(fragment, element, parserContentPolicy));73 }66 class XMLDocumentParser final : public ScriptableDocumentParser, public PendingScriptClient { 67 WTF_MAKE_FAST_ALLOCATED; 68 public: 69 static Ref<XMLDocumentParser> create(Document& document, FrameView* view) 70 { 71 return adoptRef(*new XMLDocumentParser(document, view)); 72 } 73 static Ref<XMLDocumentParser> create(DocumentFragment& fragment, Element* element, ParserContentPolicy parserContentPolicy) 74 { 75 return adoptRef(*new XMLDocumentParser(fragment, element, parserContentPolicy)); 76 } 74 77 75 ~XMLDocumentParser();78 ~XMLDocumentParser(); 76 79 77 // Exposed for callbacks:78 void handleError(XMLErrors::ErrorType, const char* message, TextPosition);80 // Exposed for callbacks: 81 void handleError(XMLErrors::ErrorType, const char* message, TextPosition); 79 82 80 void setIsXHTMLDocument(bool isXHTML) { m_isXHTMLDocument = isXHTML; }81 bool isXHTMLDocument() const { return m_isXHTMLDocument; }83 void setIsXHTMLDocument(bool isXHTML) { m_isXHTMLDocument = isXHTML; } 84 bool isXHTMLDocument() const { return m_isXHTMLDocument; } 82 85 83 static bool parseDocumentFragment(const String&, DocumentFragment&, Element* parent = nullptr, ParserContentPolicy = AllowScriptingContent);86 static bool parseDocumentFragment(const String&, DocumentFragment&, Element* parent = nullptr, ParserContentPolicy = AllowScriptingContent); 84 87 85 // Used byXMLHttpRequest to check if the responseXML was well formed.86 bool wellFormed() const final{ return !m_sawError; }88 // Used by the XMLHttpRequest to check if the responseXML was well formed. 89 bool wellFormed() const override { return !m_sawError; } 87 90 88 static bool supportsXMLVersion(const String&);91 static bool supportsXMLVersion(const String&); 89 92 90 private:91 explicitXMLDocumentParser(Document&, FrameView* = nullptr);92 XMLDocumentParser(DocumentFragment&, Element*, ParserContentPolicy);93 private: 94 XMLDocumentParser(Document&, FrameView* = nullptr); 95 XMLDocumentParser(DocumentFragment&, Element*, ParserContentPolicy); 93 96 94 void insert(SegmentedString&&) final; 95 void append(RefPtr<StringImpl>&&) final; 96 void finish() final; 97 bool isWaitingForScripts() const final; 98 void stopParsing() final; 99 void detach() final; 97 // From DocumentParser 98 void insert(const SegmentedString&) override; 99 void append(RefPtr<StringImpl>&&) override; 100 void finish() override; 101 bool isWaitingForScripts() const override; 102 void stopParsing() override; 103 void detach() override; 100 104 101 TextPosition textPosition() const final;102 bool shouldAssociateConsoleMessagesWithTextPosition() const final;105 TextPosition textPosition() const override; 106 bool shouldAssociateConsoleMessagesWithTextPosition() const override; 103 107 104 void notifyFinished(PendingScript&) final;108 void notifyFinished(PendingScript&) final; 105 109 106 void end();110 void end(); 107 111 108 void pauseParsing();109 void resumeParsing();112 void pauseParsing(); 113 void resumeParsing(); 110 114 111 bool appendFragmentSource(const String&);115 bool appendFragmentSource(const String&); 112 116 113 public: 114 // Callbacks from parser SAX, and other functions needed inside 115 // the parser implementation, but outside this class. 117 public: 118 // callbacks from parser SAX 119 void error(XMLErrors::ErrorType, const char* message, va_list args) WTF_ATTRIBUTE_PRINTF(3, 0); 120 void startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces, 121 const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes); 122 void endElementNs(); 123 void characters(const xmlChar* s, int len); 124 void processingInstruction(const xmlChar* target, const xmlChar* data); 125 void cdataBlock(const xmlChar* s, int len); 126 void comment(const xmlChar* s); 127 void startDocument(const xmlChar* version, const xmlChar* encoding, int standalone); 128 void internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID); 129 void endDocument(); 116 130 117 void error(XMLErrors::ErrorType, const char* message, va_list args) WTF_ATTRIBUTE_PRINTF(3, 0); 118 void startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, 119 int numNamespaces, const xmlChar** namespaces, 120 int numAttributes, int numDefaulted, const xmlChar** libxmlAttributes); 121 void endElementNs(); 122 void characters(const xmlChar*, int length); 123 void processingInstruction(const xmlChar* target, const xmlChar* data); 124 void cdataBlock(const xmlChar*, int length); 125 void comment(const xmlChar*); 126 void startDocument(const xmlChar* version, const xmlChar* encoding, int standalone); 127 void internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID); 128 void endDocument(); 131 bool isParsingEntityDeclaration() const { return m_isParsingEntityDeclaration; } 132 void setIsParsingEntityDeclaration(bool value) { m_isParsingEntityDeclaration = value; } 129 133 130 bool isParsingEntityDeclaration() const { return m_isParsingEntityDeclaration; }131 void setIsParsingEntityDeclaration(bool value) { m_isParsingEntityDeclaration = value; }134 int depthTriggeringEntityExpansion() const { return m_depthTriggeringEntityExpansion; } 135 void setDepthTriggeringEntityExpansion(int depth) { m_depthTriggeringEntityExpansion = depth; } 132 136 133 int depthTriggeringEntityExpansion() const { return m_depthTriggeringEntityExpansion; }134 void setDepthTriggeringEntityExpansion(int depth) { m_depthTriggeringEntityExpansion = depth; }137 private: 138 void initializeParserContext(const CString& chunk = CString()); 135 139 136 private: 137 void initializeParserContext(const CString& chunk = CString()); 140 void pushCurrentNode(ContainerNode*); 141 void popCurrentNode(); 142 void clearCurrentNodeStack(); 138 143 139 void pushCurrentNode(ContainerNode*); 140 void popCurrentNode(); 141 void clearCurrentNodeStack(); 144 void insertErrorMessageBlock(); 142 145 143 void insertErrorMessageBlock(); 146 void createLeafTextNode(); 147 bool updateLeafTextNode(); 144 148 145 void createLeafTextNode();146 bool updateLeafTextNode();149 void doWrite(const String&); 150 void doEnd(); 147 151 148 void doWrite(const String&); 149 void doEnd(); 152 FrameView* m_view; 150 153 151 xmlParserCtxtPtr context() const { return m_context ? m_context->context() : nullptr; };154 SegmentedString m_originalSourceForTransform; 152 155 153 FrameView* m_view { nullptr }; 156 xmlParserCtxtPtr context() const { return m_context ? m_context->context() : nullptr; }; 157 RefPtr<XMLParserContext> m_context; 158 std::unique_ptr<PendingCallbacks> m_pendingCallbacks; 159 Vector<xmlChar> m_bufferedText; 160 int m_depthTriggeringEntityExpansion; 161 bool m_isParsingEntityDeclaration; 154 162 155 SegmentedString m_originalSourceForTransform; 163 ContainerNode* m_currentNode; 164 Vector<ContainerNode*> m_currentNodeStack; 156 165 157 RefPtr<XMLParserContext> m_context; 158 std::unique_ptr<PendingCallbacks> m_pendingCallbacks; 159 Vector<xmlChar> m_bufferedText; 160 int m_depthTriggeringEntityExpansion { -1 }; 161 bool m_isParsingEntityDeclaration { false }; 166 RefPtr<Text> m_leafTextNode; 162 167 163 ContainerNode* m_currentNode { nullptr }; 164 Vector<ContainerNode*> m_currentNodeStack; 168 bool m_sawError; 169 bool m_sawCSS; 170 bool m_sawXSLTransform; 171 bool m_sawFirstElement; 172 bool m_isXHTMLDocument; 173 bool m_parserPaused; 174 bool m_requestingScript; 175 bool m_finishCalled; 165 176 166 RefPtr<Text> m_leafTextNode;177 std::unique_ptr<XMLErrors> m_xmlErrors; 167 178 168 bool m_sawError { false }; 169 bool m_sawCSS { false }; 170 bool m_sawXSLTransform { false }; 171 bool m_sawFirstElement { false }; 172 bool m_isXHTMLDocument { false }; 173 bool m_parserPaused { false }; 174 bool m_requestingScript { false }; 175 bool m_finishCalled { false }; 179 RefPtr<PendingScript> m_pendingScript; 180 TextPosition m_scriptStartPosition; 176 181 177 std::unique_ptr<XMLErrors> m_xmlErrors; 182 bool m_parsingFragment; 183 AtomicString m_defaultNamespaceURI; 178 184 179 RefPtr<PendingScript> m_pendingScript; 180 TextPosition m_scriptStartPosition; 181 182 bool m_parsingFragment { false }; 183 AtomicString m_defaultNamespaceURI; 184 185 HashMap<AtomicString, AtomicString> m_prefixToNamespaceMap; 186 SegmentedString m_pendingSrc; 187 }; 185 typedef HashMap<AtomicString, AtomicString> PrefixForNamespaceMap; 186 PrefixForNamespaceMap m_prefixToNamespaceMap; 187 SegmentedString m_pendingSrc; 188 }; 188 189 189 190 #if ENABLE(XSLT) -
trunk/Source/WebCore/xml/parser/XMLDocumentParserLibxml2.cpp
r209058 r209120 1 1 /* 2 2 * Copyright (C) 2000 Peter Kelly <pmk@post.com> 3 * Copyright (C) 2005 -2016Apple Inc. All rights reserved.3 * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved. 4 4 * Copyright (C) 2006 Alexey Proskuryakov <ap@webkit.org> 5 5 * Copyright (C) 2007 Samuel Weinig <sam@webkit.org> … … 36 36 #include "DocumentType.h" 37 37 #include "Frame.h" 38 #include "FrameLoader.h" 39 #include "FrameView.h" 38 40 #include "HTMLEntityParser.h" 39 41 #include "HTMLHtmlElement.h" 42 #include "HTMLLinkElement.h" 43 #include "HTMLNames.h" 44 #include "HTMLStyleElement.h" 40 45 #include "HTMLTemplateElement.h" 46 #include "LoadableClassicScript.h" 41 47 #include "Page.h" 42 48 #include "PendingScript.h" 43 49 #include "ProcessingInstruction.h" 44 50 #include "ResourceError.h" 51 #include "ResourceRequest.h" 45 52 #include "ResourceResponse.h" 46 53 #include "ScriptElement.h" 47 54 #include "ScriptSourceCode.h" 55 #include "SecurityOrigin.h" 48 56 #include "Settings.h" 49 57 #include "StyleScope.h" 58 #include "TextResourceDecoder.h" 50 59 #include "TransformSource.h" 51 60 #include "XMLNSNames.h" 52 61 #include "XMLDocumentParserScope.h" 53 62 #include <libxml/parserInternals.h> 63 #include <wtf/Ref.h> 54 64 #include <wtf/StringExtras.h> 65 #include <wtf/Threading.h> 66 #include <wtf/Vector.h> 55 67 #include <wtf/unicode/UTF8.h> 56 68 … … 63 75 64 76 #if ENABLE(XSLT) 65 66 static inline bool shouldRenderInXMLTreeViewerMode(Document& document) 67 { 68 if (document.sawElementsInKnownNamespaces()) 77 static inline bool hasNoStyleInformation(Document* document) 78 { 79 if (document->sawElementsInKnownNamespaces()) 69 80 return false; 70 81 71 if (document .transformSourceDocument())82 if (document->transformSourceDocument()) 72 83 return false; 73 84 74 auto* frame = document.frame(); 75 if (!frame) 85 if (!document->frame() || !document->frame()->page()) 76 86 return false; 77 87 78 if (! frame->settings().developerExtrasEnabled())88 if (!document->frame()->page()->settings().developerExtrasEnabled()) 79 89 return false; 80 90 81 if ( frame->tree().parent())91 if (document->frame()->tree().parent()) 82 92 return false; // This document is not in a top frame 83 93 84 94 return true; 85 95 } 86 87 96 #endif 88 97 89 98 class PendingCallbacks { 90 WTF_MAKE_ FAST_ALLOCATED;99 WTF_MAKE_NONCOPYABLE(PendingCallbacks); WTF_MAKE_FAST_ALLOCATED; 91 100 public: 101 PendingCallbacks() = default; 102 92 103 void appendStartElementNSCallback(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int numNamespaces, const xmlChar** namespaces, int numAttributes, int numDefaulted, const xmlChar** attributes) 93 104 { … … 565 576 : ScriptableDocumentParser(document) 566 577 , m_view(frameView) 578 , m_context(nullptr) 567 579 , m_pendingCallbacks(std::make_unique<PendingCallbacks>()) 580 , m_depthTriggeringEntityExpansion(-1) 581 , m_isParsingEntityDeclaration(false) 568 582 , m_currentNode(&document) 583 , m_sawError(false) 584 , m_sawCSS(false) 585 , m_sawXSLTransform(false) 586 , m_sawFirstElement(false) 587 , m_isXHTMLDocument(false) 588 , m_parserPaused(false) 589 , m_requestingScript(false) 590 , m_finishCalled(false) 569 591 , m_scriptStartPosition(TextPosition::belowRangePosition()) 592 , m_parsingFragment(false) 570 593 { 571 594 } … … 573 596 XMLDocumentParser::XMLDocumentParser(DocumentFragment& fragment, Element* parentElement, ParserContentPolicy parserContentPolicy) 574 597 : ScriptableDocumentParser(fragment.document(), parserContentPolicy) 598 , m_view(nullptr) 599 , m_context(nullptr) 575 600 , m_pendingCallbacks(std::make_unique<PendingCallbacks>()) 601 , m_depthTriggeringEntityExpansion(-1) 602 , m_isParsingEntityDeclaration(false) 576 603 , m_currentNode(&fragment) 604 , m_sawError(false) 605 , m_sawCSS(false) 606 , m_sawXSLTransform(false) 607 , m_sawFirstElement(false) 608 , m_isXHTMLDocument(false) 609 , m_parserPaused(false) 610 , m_requestingScript(false) 611 , m_finishCalled(false) 577 612 , m_scriptStartPosition(TextPosition::belowRangePosition()) 578 613 , m_parsingFragment(true) … … 1160 1195 { 1161 1196 const char* originalTarget = target; 1162 auto conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity, utf16Entity + numberOfCodeUnits, &target, target + targetSize); 1197 WTF::Unicode::ConversionResult conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity, 1198 utf16Entity + numberOfCodeUnits, &target, target + targetSize); 1163 1199 if (conversionResult != WTF::Unicode::conversionOK) 1164 1200 return 0; … … 1329 1365 1330 1366 #if ENABLE(XSLT) 1331 bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && shouldRenderInXMLTreeViewerMode(*document());1367 bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && hasNoStyleInformation(document()); 1332 1368 if (xmlViewerMode) { 1333 1369 XMLTreeViewer xmlTreeViewer(*document()); … … 1415 1451 } 1416 1452 1453 // Then, write any pending data 1454 SegmentedString rest = m_pendingSrc; 1455 m_pendingSrc.clear(); 1417 1456 // There is normally only one string left, so toString() shouldn't copy. 1418 1457 // In any case, the XML parser runs on the main thread and it's OK if 1419 1458 // the passed string has more than one reference. 1420 auto rest = m_pendingSrc.toString(); 1421 m_pendingSrc.clear(); 1422 append(rest.impl()); 1459 append(rest.toString().impl()); 1423 1460 1424 1461 // Finally, if finish() has been called and write() didn't result
Note: See TracChangeset
for help on using the changeset viewer.