Changeset 140610 in webkit
- Timestamp:
- Jan 23, 2013 4:26:32 PM (11 years ago)
- Location:
- trunk
- Files:
-
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/LayoutTests/ChangeLog
r140608 r140610 1 2013-01-23 Martin Robinson <mrobinson@igalia.com> 2 3 WebKit should support decoding multi-byte entities in XML content 4 https://bugs.webkit.org/show_bug.cgi?id=107459 5 6 Reviewed by Adam Barth. 7 8 Add a simple test for decoding some entities that resolve to multiple 9 bytes in XML. 10 11 * fast/parser/entities-in-xhtml.xhtml: Added a few multi-bye entities. 12 * fast/parser/entities-in-xhtml-expected.txt: Updated. 13 1 14 2013-01-23 Filip Pizlo <fpizlo@apple.com> 2 15 -
trunk/LayoutTests/fast/parser/entities-in-xhtml-expected.txt
r65355 r140610 267 267 9829 2665 hearts ♥ ♥ ♥ 268 268 9830 2666 diams ♦ ♦ ♦ 269 Multi-byte entities from HTML5 270 271 decimal hexadecimal entity name &#nnn; &#xhhh; &entity; 272 8882 22B2 vltri ⊲ ⊲ ⊲ 273 8834 + 8402 2282 + 20D2 sub ⊂⃒ ⊂⃒ ⊂⃒ 274 8804 + 8402 2264 + 20D2 sub ≤⃒ ≤⃒ ≤⃒ -
trunk/LayoutTests/fast/parser/entities-in-xhtml.xhtml
r124410 r140610 333 333 <tr><td>9829</td><td>2665</td><td>hearts</td><td>♥</td><td>♥</td><td>♥</td></tr> 334 334 <tr><td>9830</td><td>2666</td><td>diams</td><td>♦</td><td>♦</td><td>♦</td></tr> 335 336 </tbody> 337 </table> 338 339 <h2>Multi-byte entities from HTML5</h2> 340 <table border="1" summary="this table lists decimal value, hexadecimal value, 341 entity name and respective entity references of XHTML entities."> 342 <thead> 343 <tr> 344 <th>decimal</th> 345 <th>hexadecimal</th> 346 <th>entity name</th> 347 <th>&#nnn;</th> 348 <th>&#xhhh;</th> 349 <th>&entity;</th> 350 </tr> 351 </thead> 352 <tbody> 353 <tr><td>8882</td><td>22B2</td><td>vltri</td><td>⊲</td><td>⊲</td><td>⊲</td></tr> 354 <tr><td>8834 + 8402</td><td>2282 + 20D2</td><td>sub</td><td>⊂⃒</td><td>⊂⃒</td><td>⊂⃒</td></tr> 355 <tr><td>8804 + 8402</td><td>2264 + 20D2</td><td>sub</td><td>≤⃒</td><td>≤⃒</td><td>≤⃒</td></tr> 335 356 </tbody> 336 357 </table> -
trunk/Source/WebCore/ChangeLog
r140609 r140610 1 2013-01-23 Martin Robinson <mrobinson@igalia.com> 2 3 WebKit should support decoding multi-byte entities in XML content 4 https://bugs.webkit.org/show_bug.cgi?id=107459 5 6 Reviewed by Adam Barth. 7 8 Test: fast/parser/entities-in-xhtml.xhtml 9 10 * html/parser/HTMLEntityParser.cpp: 11 (WebCore::appendUChar32ToUCharArray): Added this helper function. Later patches 12 may try to move this code to somewhere that it can be shared more easily. 13 (WebCore::decodeNamedEntityToUCharArray): Modify this function to work on a UChar 14 array four elements long, so that multi-byte and multi-character entities can be resolved. 15 * html/parser/HTMLEntityParser.h: Updated function declaratoin. 16 * xml/parser/XMLDocumentParserLibxml2.cpp: 17 (WebCore): Modify the statically allocated entity string memory area to accommodate 18 up to two UTF-8 characters. Each UTF-8 character can be 4 bytes, so this brings the 19 total size to 9 bytes. 20 (WebCore::getXHTMLEntity): Use the new entity decoding API. 21 * xml/parser/XMLDocumentParserQt.cpp: 22 (WebCore::EntityResolver::resolveUndeclaredEntity): Ditto. 23 (WebCore::XMLDocumentParser::parse): Ditto. 24 1 25 2013-01-23 Eric Seidel <eric@webkit.org> 2 26 -
trunk/Source/WebCore/html/parser/HTMLEntityParser.cpp
r133995 r140610 141 141 } 142 142 143 UChar decodeNamedEntity(const char* name) 143 static size_t appendUChar32ToUCharArray(UChar32 value, UChar* result) 144 { 145 if (U_IS_BMP(value)) { 146 UChar character = static_cast<UChar>(value); 147 ASSERT(character == value); 148 result[0] = character; 149 return 1; 150 } 151 152 result[0] = U16_LEAD(value); 153 result[1] = U16_TRAIL(value); 154 return 2; 155 } 156 157 size_t decodeNamedEntityToUCharArray(const char* name, UChar result[4]) 144 158 { 145 159 HTMLEntitySearch search; … … 153 167 return 0; 154 168 155 UChar32 firstValue = search.mostRecentMatch()->firstValue; 156 if (U16_LENGTH(firstValue) != 1 || search.mostRecentMatch()->secondValue) { 157 // FIXME: Callers need to move off this API. Not all entities can be 158 // represented in a single UChar! 159 return 0; 160 } 161 return static_cast<UChar>(firstValue); 169 size_t numberOfCodePoints = appendUChar32ToUCharArray(search.mostRecentMatch()->firstValue, result); 170 if (!search.mostRecentMatch()->secondValue) 171 return numberOfCodePoints; 172 return numberOfCodePoints + appendUChar32ToUCharArray(search.mostRecentMatch()->secondValue, result + numberOfCodePoints); 162 173 } 163 174 -
trunk/Source/WebCore/html/parser/HTMLEntityParser.h
r103246 r140610 35 35 36 36 // Used by the XML parser. Not suitable for use in HTML parsing. Use consumeHTMLEntity instead. 37 // FIXME: Move the XML parser to an entity decoding function works for non-BMP characters! 38 UChar decodeNamedEntity(const char*); 37 size_t decodeNamedEntityToUCharArray(const char*, UChar result[4]); 39 38 40 39 } -
trunk/Source/WebCore/xml/parser/XMLDocumentParserLibxml2.cpp
r139111 r140610 63 63 #include <wtf/UnusedParam.h> 64 64 #include <wtf/Vector.h> 65 #include <wtf/unicode/UTF8.h> 65 66 66 67 #if ENABLE(XSLT) … … 1161 1162 // a hack to avoid malloc/free. Using a global variable like this could cause trouble 1162 1163 // if libxml implementation details were to change 1163 static xmlChar sharedXHTMLEntityResult[ 5] = {0, 0, 0, 0, 0};1164 static xmlChar sharedXHTMLEntityResult[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; 1164 1165 1165 1166 static xmlEntityPtr sharedXHTMLEntity() … … 1175 1176 } 1176 1177 1178 static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize) 1179 { 1180 const char* originalTarget = target; 1181 WTF::Unicode::ConversionResult conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity, 1182 utf16Entity + numberOfCodeUnits, &target, target + targetSize); 1183 if (conversionResult != WTF::Unicode::conversionOK) 1184 return 0; 1185 1186 // Even though we must pass the length, libxml expects the entity string to be null terminated. 1187 ASSERT(target > originalTarget + 1); 1188 *target = '\0'; 1189 return target - originalTarget; 1190 } 1191 1177 1192 static xmlEntityPtr getXHTMLEntity(const xmlChar* name) 1178 1193 { 1179 UChar c = decodeNamedEntity(reinterpret_cast<const char*>(name)); 1180 if (!c) 1194 UChar utf16DecodedEntity[4]; 1195 size_t numberOfCodeUnits = decodeNamedEntityToUCharArray(reinterpret_cast<const char*>(name), utf16DecodedEntity); 1196 if (!numberOfCodeUnits) 1181 1197 return 0; 1182 1198 1183 CString value = String(&c, 1).utf8(); 1184 ASSERT(value.length() < 5); 1199 ASSERT(numberOfCodeUnits <= 4); 1200 size_t entityLengthInUTF8 = convertUTF16EntityToUTF8(utf16DecodedEntity, numberOfCodeUnits, 1201 reinterpret_cast<char*>(sharedXHTMLEntityResult), WTF_ARRAY_LENGTH(sharedXHTMLEntityResult)); 1202 if (!entityLengthInUTF8) 1203 return 0; 1204 1185 1205 xmlEntityPtr entity = sharedXHTMLEntity(); 1186 entity->length = value.length();1206 entity->length = entityLengthInUTF8; 1187 1207 entity->name = name; 1188 memcpy(sharedXHTMLEntityResult, value.data(), entity->length + 1);1189 1190 1208 return entity; 1191 1209 } -
trunk/Source/WebCore/xml/parser/XMLDocumentParserQt.cpp
r139111 r140610 68 68 }; 69 69 70 static QString decodeNamedEntity(const QString& entityName) 71 { 72 UChar utf16DecodedEntity[4]; 73 size_t numberOfCodePoints = decodeNamedEntityToUCharArray(entityName.toUtf8().constData(), utf16DecodedEntity); 74 return QString::fromUTF16(utf16DecodedEntity, numberOfCodePoints); 75 } 76 70 77 QString EntityResolver::resolveUndeclaredEntity(const QString &name) 71 78 { 72 UChar c = decodeNamedEntity(name.toUtf8().constData()); 73 return QString(c); 79 return decodeNamedEntity(name); 74 80 } 75 81 … … 395 401 if (isXHTMLDocument()) { 396 402 QString entity = m_stream.name().toString(); 397 UChar c = decodeNamedEntity(entity.toUtf8().constData());398 403 if (!m_leafTextNode) 399 404 enterText(); 400 405 ExceptionCode ec = 0; 401 String str(&c, 1);402 406 // qDebug()<<" ------- adding entity "<<str; 403 m_leafTextNode->appendData( str, ec);407 m_leafTextNode->appendData(decodeNamedEntity(entity), ec); 404 408 } 405 409 break;
Note: See TracChangeset
for help on using the changeset viewer.