Changeset 73201 in webkit
- Timestamp:
- Dec 2, 2010 2:45:36 PM (13 years ago)
- Location:
- trunk
- Files:
-
- 11 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JavaScriptCore/ChangeLog
r73198 r73201 1 2010-12-02 Patrick Gansterer <paroga@webkit.org> 2 3 Reviewed by Darin Adler. 4 5 Add AtomicString::fromUTF8 6 https://bugs.webkit.org/show_bug.cgi?id=45594 7 8 Unicode::calculateStringHashFromUTF8 creates a StringHash out of UTF8 input data and 9 calculates the required length for the UTF16 conversation in one step. 10 This is then used in a specialized translator for the string table of AtomicString. 11 12 * JavaScriptCore.exp: 13 * JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.def: 14 * wtf/text/AtomicString.cpp: 15 (WTF::CStringTranslator::equal): 16 (WTF::HashAndUTF8CharactersTranslator::hash): 17 (WTF::HashAndUTF8CharactersTranslator::equal): 18 (WTF::HashAndUTF8CharactersTranslator::translate): 19 (WTF::AtomicString::add): 20 (WTF::AtomicString::addSlowCase): 21 (WTF::AtomicString::find): 22 (WTF::AtomicString::fromUTF8): 23 * wtf/text/AtomicString.h: 24 * wtf/text/StringImpl.h: 25 * wtf/text/WTFString.h: 26 * wtf/unicode/UTF8.cpp: 27 (WTF::Unicode::readUTF8Sequence): 28 (WTF::Unicode::convertUTF8ToUTF16): 29 (WTF::Unicode::calculateStringHashFromUTF8): 30 (WTF::Unicode::equalUTF16WithUTF8): 31 * wtf/unicode/UTF8.h: 32 1 33 2010-12-02 Geoffrey Garen <ggaren@apple.com> 2 34 -
trunk/JavaScriptCore/JavaScriptCore.exp
r73095 r73201 374 374 __ZN3WTF12AtomicString4findEPKtjj 375 375 __ZN3WTF12AtomicString4initEv 376 __ZN3WTF12AtomicString8fromUTF8EPKc 377 __ZN3WTF12AtomicString8fromUTF8EPKcm 376 378 __ZN3WTF12createThreadEPFPvS0_ES0_ 377 379 __ZN3WTF12createThreadEPFPvS0_ES0_PKc -
trunk/JavaScriptCore/JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.def
r73099 r73201 60 60 ?bufferLengthForStringDecimal@DecimalNumber@WTF@@QBEIXZ 61 61 ?calculateDSTOffset@WTF@@YANNN@Z 62 ?calculateStringHashFromUTF8@Unicode@WTF@@YAIPBD0AAI@Z 62 63 ?calculateUTCOffset@WTF@@YAHXZ 63 64 ?calculatedFunctionName@DebuggerCallFrame@JSC@@QBE?AVUString@2@XZ … … 140 141 ?enumerable@PropertyDescriptor@JSC@@QBE_NXZ 141 142 ?equal@Identifier@JSC@@SA_NPBVStringImpl@WTF@@PBD@Z 143 ?equalUTF16WithUTF8@Unicode@WTF@@YA_NPB_W0PBD1@Z 142 144 ?evaluate@DebuggerCallFrame@JSC@@QBE?AVJSValue@2@ABVUString@2@AAV32@@Z 143 145 ?evaluate@JSC@@YA?AVCompletion@1@PAVExecState@1@AAVScopeChain@1@ABVSourceCode@1@VJSValue@1@@Z -
trunk/JavaScriptCore/wtf/text/AtomicString.cpp
r71375 r73201 1 1 /* 2 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. 3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> 3 4 * 4 5 * This library is free software; you can redistribute it and/or … … 27 28 #include <wtf/Threading.h> 28 29 #include <wtf/WTFThreadData.h> 30 #include <wtf/unicode/UTF8.h> 29 31 30 32 namespace WTF { 33 34 using namespace Unicode; 31 35 32 36 COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size); … … 86 90 return false; 87 91 } 88 return s[length] == 0;92 return !s[length]; 89 93 } 90 94 … … 207 211 }; 208 212 213 struct HashAndUTF8Characters { 214 unsigned hash; 215 const char* characters; 216 unsigned length; 217 unsigned utf16Length; 218 }; 219 220 struct HashAndUTF8CharactersTranslator { 221 static unsigned hash(const HashAndUTF8Characters& buffer) 222 { 223 return buffer.hash; 224 } 225 226 static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer) 227 { 228 return equalUTF16WithUTF8(string->characters(), string->characters() + string->length(), buffer.characters, buffer.characters + buffer.length); 229 } 230 231 static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash) 232 { 233 UChar* target; 234 location = StringImpl::createUninitialized(buffer.utf16Length, target).releaseRef(); 235 236 const char* source = buffer.characters; 237 if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length) != conversionOK) 238 ASSERT_NOT_REACHED(); 239 240 location->setHash(hash); 241 location->setIsAtomic(true); 242 } 243 }; 244 209 245 PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length) 210 246 { … … 212 248 return 0; 213 249 214 if ( length == 0)250 if (!length) 215 251 return StringImpl::empty(); 216 252 … … 228 264 ASSERT(existingHash); 229 265 230 if ( length == 0)266 if (!length) 231 267 return StringImpl::empty(); 232 268 … … 247 283 length++; 248 284 249 if ( length == 0)285 if (!length) 250 286 return StringImpl::empty(); 251 287 … … 263 299 return r; 264 300 265 if ( r->length() == 0)301 if (!r->length()) 266 302 return StringImpl::empty(); 267 303 … … 277 313 ASSERT(existingHash); 278 314 279 if ( length == 0)315 if (!length) 280 316 return static_cast<AtomicStringImpl*>(StringImpl::empty()); 281 317 … … 291 327 stringTable().remove(r); 292 328 } 293 329 294 330 AtomicString AtomicString::lower() const 295 331 { … … 304 340 } 305 341 306 } 342 AtomicString AtomicString::fromUTF8(const char* characters, size_t length) 343 { 344 if (!characters) 345 return AtomicString(); 346 347 if (!length) 348 return emptyAtom; 349 350 HashAndUTF8Characters buffer; 351 buffer.characters = characters; 352 buffer.length = length; 353 buffer.hash = calculateStringHashFromUTF8(characters, characters + length, buffer.utf16Length); 354 355 if (!buffer.hash) 356 return AtomicString(); 357 358 pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer); 359 360 // If the string is newly-translated, then we need to adopt it. 361 // The boolean in the pair tells us if that is so. 362 AtomicString atomicString; 363 atomicString.m_string = addResult.second ? adoptRef(*addResult.first) : *addResult.first; 364 return atomicString; 365 } 366 367 AtomicString AtomicString::fromUTF8(const char* characters) 368 { 369 if (!characters) 370 return AtomicString(); 371 return fromUTF8(characters, strlen(characters)); 372 } 373 374 } // namespace WTF -
trunk/JavaScriptCore/wtf/text/AtomicString.h
r68422 r73201 109 109 #endif 110 110 111 // AtomicString::fromUTF8 will return a null string if 112 // the input data contains invalid UTF-8 sequences. 113 static AtomicString fromUTF8(const char*, size_t); 114 static AtomicString fromUTF8(const char*); 115 111 116 private: 112 117 String m_string; -
trunk/JavaScriptCore/wtf/text/StringImpl.h
r72477 r73201 54 54 struct CStringTranslator; 55 55 struct HashAndCharactersTranslator; 56 struct HashAndUTF8CharactersTranslator; 56 57 struct UCharBufferTranslator; 57 58 … … 67 68 friend struct WTF::CStringTranslator; 68 69 friend struct WTF::HashAndCharactersTranslator; 70 friend struct WTF::HashAndUTF8CharactersTranslator; 69 71 friend struct WTF::UCharBufferTranslator; 70 72 friend class AtomicStringImpl; -
trunk/JavaScriptCore/wtf/text/WTFString.h
r70288 r73201 310 310 #endif 311 311 312 // String::fromUTF8 will return a null string if 313 // the input data contains invalid UTF-8 sequences. 312 314 static String fromUTF8(const char*, size_t); 313 315 static String fromUTF8(const char*); -
trunk/JavaScriptCore/wtf/unicode/UTF8.cpp
r72979 r73201 1 1 /* 2 2 * Copyright (C) 2007 Apple Inc. All rights reserved. 3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> 3 4 * 4 5 * Redistribution and use in source and binary forms, with or without … … 26 27 #include "config.h" 27 28 #include "UTF8.h" 29 #include <wtf/StringHasher.h> 28 30 29 31 #include "ASCIICType.h" … … 33 35 34 36 // FIXME: Use definition from CharacterNames.h. 35 const UChar replacementCharacter = 0xFFFD;37 static const UChar replacementCharacter = 0xFFFD; 36 38 37 39 inline int inlineUTF8SequenceLengthNonASCII(char b0) … … 315 317 } 316 318 319 unsigned calculateStringHashFromUTF8(const char* data, const char* dataEnd, unsigned& utf16Length) 320 { 321 if (!data) 322 return 0; 323 324 WTF::StringHasher stringHasher; 325 utf16Length = 0; 326 327 while (data < dataEnd) { 328 if (isASCII(*data)) { 329 stringHasher.addCharacter(*data++); 330 utf16Length++; 331 continue; 332 } 333 334 int utf8SequenceLength = inlineUTF8SequenceLengthNonASCII(*data); 335 336 if (dataEnd - data < utf8SequenceLength) 337 return false; 338 339 if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(data), utf8SequenceLength)) 340 return 0; 341 342 UChar32 character = readUTF8Sequence(data, utf8SequenceLength); 343 ASSERT(!isASCII(character)); 344 345 if (U_IS_BMP(character)) { 346 // UTF-16 surrogate values are illegal in UTF-32 347 if (U_IS_SURROGATE(character)) 348 return 0; 349 stringHasher.addCharacter(static_cast<UChar>(character)); // normal case 350 utf16Length++; 351 } else if (U_IS_SUPPLEMENTARY(character)) { 352 stringHasher.addCharacters(static_cast<UChar>(U16_LEAD(character)), 353 static_cast<UChar>(U16_TRAIL(character))); 354 utf16Length += 2; 355 } else 356 return 0; 357 } 358 359 return stringHasher.hash(); 360 } 361 362 bool equalUTF16WithUTF8(const UChar* a, const UChar* aEnd, const char* b, const char* bEnd) 363 { 364 while (b < bEnd) { 365 if (isASCII(*b)) { 366 if (*a++ != *b++) 367 return false; 368 continue; 369 } 370 371 int utf8SequenceLength = inlineUTF8SequenceLengthNonASCII(*b); 372 373 if (bEnd - b < utf8SequenceLength) 374 return false; 375 376 if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(b), utf8SequenceLength)) 377 return 0; 378 379 UChar32 character = readUTF8Sequence(b, utf8SequenceLength); 380 ASSERT(!isASCII(character)); 381 382 if (U_IS_BMP(character)) { 383 // UTF-16 surrogate values are illegal in UTF-32 384 if (U_IS_SURROGATE(character)) 385 return false; 386 if (*a++ != character) 387 return false; 388 } else if (U_IS_SUPPLEMENTARY(character)) { 389 if (*a++ != U16_LEAD(character)) 390 return false; 391 if (*a++ != U16_TRAIL(character)) 392 return false; 393 } else 394 return false; 395 } 396 397 return a == aEnd; 398 } 399 317 400 } // namespace Unicode 318 401 } // namespace WTF -
trunk/JavaScriptCore/wtf/unicode/UTF8.h
r27810 r73201 30 30 31 31 namespace WTF { 32 32 namespace Unicode { 33 33 34 34 // Given a first byte, gives the length of the UTF-8 sequence it begins. … … 70 70 const UChar** sourceStart, const UChar* sourceEnd, 71 71 char** targetStart, char* targetEnd, bool strict = true); 72 } 73 } 72 73 unsigned calculateStringHashFromUTF8(const char* data, const char* dataEnd, unsigned& utf16Length); 74 75 bool equalUTF16WithUTF8(const UChar* a, const UChar* aEnd, const char* b, const char* bEnd); 76 77 } // namespace Unicode 78 } // namespace WTF 74 79 75 80 #endif // WTF_UTF8_h -
trunk/WebCore/ChangeLog
r73197 r73201 1 2010-12-02 Patrick Gansterer <paroga@webkit.org> 2 3 Reviewed by Darin Adler. 4 5 Add AtomicString::fromUTF8 6 https://bugs.webkit.org/show_bug.cgi?id=45594 7 8 Use AtomicString::fromUTF8 directly in the libxml2 parser. 9 10 * dom/XMLDocumentParserLibxml2.cpp: 11 (WebCore::toAtomicString): 12 1 13 2010-12-02 Andy Estes <aestes@apple.com> 2 14 -
trunk/WebCore/dom/XMLDocumentParserLibxml2.cpp
r71895 r73201 687 687 static inline AtomicString toAtomicString(const xmlChar* string, size_t size) 688 688 { 689 // FIXME: Use AtomicString::fromUTF8. 690 return AtomicString(toString(string, size)); 689 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), size); 691 690 } 692 691 693 692 static inline AtomicString toAtomicString(const xmlChar* string) 694 693 { 695 // FIXME: Use AtomicString::fromUTF8. 696 return AtomicString(toString(string)); 694 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string)); 697 695 } 698 696
Note: See TracChangeset
for help on using the changeset viewer.