Changeset 133529 in webkit
- Timestamp:
- Nov 5, 2012 2:39:10 PM (11 years ago)
- Location:
- trunk
- Files:
-
- 70 added
- 10 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/LayoutTests/ChangeLog
r133527 r133529 1 2012-11-05 Glenn Adams <glenn@skynav.com> 2 3 Add support to -webkit-line-break property for CSS3 Text line-break property values and semantics. 4 https://bugs.webkit.org/show_bug.cgi?id=89235 5 6 Reviewed by Eric Seidel. 7 8 See also wiki documentation at: 9 [1] http://trac.webkit.org/wiki/LineBreaking 10 [2] http://trac.webkit.org/wiki/LineBreakingCSS3Mapping 11 12 * css3/line-break/line-break-auto-centered-expected.html: Added. 13 * css3/line-break/line-break-auto-centered.html: Added. 14 * css3/line-break/line-break-auto-half-kana-expected.html: Added. 15 * css3/line-break/line-break-auto-half-kana.html: Added. 16 * css3/line-break/line-break-auto-hyphens-expected.html: Added. 17 * css3/line-break/line-break-auto-hyphens.html: Added. 18 * css3/line-break/line-break-auto-inseparables-expected.html: Added. 19 * css3/line-break/line-break-auto-inseparables.html: Added. 20 * css3/line-break/line-break-auto-iteration-marks-expected.html: Added. 21 * css3/line-break/line-break-auto-iteration-marks.html: Added. 22 * css3/line-break/line-break-auto-postfixes-expected.html: Added. 23 * css3/line-break/line-break-auto-postfixes.html: Added. 24 * css3/line-break/line-break-auto-prefixes-expected.html: Added. 25 * css3/line-break/line-break-auto-prefixes.html: Added. 26 * css3/line-break/line-break-auto-sound-marks-expected.html: Added. 27 * css3/line-break/line-break-auto-sound-marks.html: Added. 28 * css3/line-break/line-break-loose-centered-expected.html: Added. 29 * css3/line-break/line-break-loose-centered.html: Added. 30 * css3/line-break/line-break-loose-half-kana-expected.html: Added. 31 * css3/line-break/line-break-loose-half-kana.html: Added. 32 * css3/line-break/line-break-loose-hyphens-expected.html: Added. 33 * css3/line-break/line-break-loose-hyphens.html: Added. 34 * css3/line-break/line-break-loose-inseparables-expected.html: Added. 35 * css3/line-break/line-break-loose-inseparables.html: Added. 36 * css3/line-break/line-break-loose-iteration-marks-expected.html: Added. 37 * css3/line-break/line-break-loose-iteration-marks.html: Added. 38 * css3/line-break/line-break-loose-postfixes-expected.html: Added. 39 * css3/line-break/line-break-loose-postfixes.html: Added. 40 * css3/line-break/line-break-loose-prefixes-expected.html: Added. 41 * css3/line-break/line-break-loose-prefixes.html: Added. 42 * css3/line-break/line-break-loose-sound-marks-expected.html: Added. 43 * css3/line-break/line-break-loose-sound-marks.html: Added. 44 * css3/line-break/line-break-normal-centered-expected.html: Added. 45 * css3/line-break/line-break-normal-centered.html: Added. 46 * css3/line-break/line-break-normal-half-kana-expected.html: Added. 47 * css3/line-break/line-break-normal-half-kana.html: Added. 48 * css3/line-break/line-break-normal-hyphens-expected.html: Added. 49 * css3/line-break/line-break-normal-hyphens.html: Added. 50 * css3/line-break/line-break-normal-inseparables-expected.html: Added. 51 * css3/line-break/line-break-normal-inseparables.html: Added. 52 * css3/line-break/line-break-normal-iteration-marks-expected.html: Added. 53 * css3/line-break/line-break-normal-iteration-marks.html: Added. 54 * css3/line-break/line-break-normal-postfixes-expected.html: Added. 55 * css3/line-break/line-break-normal-postfixes.html: Added. 56 * css3/line-break/line-break-normal-prefixes-expected.html: Added. 57 * css3/line-break/line-break-normal-prefixes.html: Added. 58 * css3/line-break/line-break-normal-sound-marks-expected.html: Added. 59 * css3/line-break/line-break-normal-sound-marks.html: Added. 60 * css3/line-break/line-break-strict-centered-expected.html: Added. 61 * css3/line-break/line-break-strict-centered.html: Added. 62 * css3/line-break/line-break-strict-half-kana-expected.html: Added. 63 * css3/line-break/line-break-strict-half-kana.html: Added. 64 * css3/line-break/line-break-strict-hyphens-expected.html: Added. 65 * css3/line-break/line-break-strict-hyphens.html: Added. 66 * css3/line-break/line-break-strict-inseparables-expected.html: Added. 67 * css3/line-break/line-break-strict-inseparables.html: Added. 68 * css3/line-break/line-break-strict-iteration-marks-expected.html: Added. 69 * css3/line-break/line-break-strict-iteration-marks.html: Added. 70 * css3/line-break/line-break-strict-postfixes-expected.html: Added. 71 * css3/line-break/line-break-strict-postfixes.html: Added. 72 * css3/line-break/line-break-strict-prefixes-expected.html: Added. 73 * css3/line-break/line-break-strict-prefixes.html: Added. 74 * css3/line-break/line-break-strict-sound-marks-expected.html: Added. 75 * css3/line-break/line-break-strict-sound-marks.html: Added. 76 77 * platform/chromium/css3/line-break/line-break-auto-half-kana-expected.html: Added. 78 * platform/chromium/css3/line-break/line-break-auto-sound-marks-expected.html: Added. 79 Override reftest expectations on chromium due to different ICU auto behavior. 80 81 * platform/chromium-android/css3/line-break/line-break-auto-half-kana-expected.html: Added. 82 * platform/chromium-android/css3/line-break/line-break-auto-sound-marks-expected.html: Added. 83 Override override of reftest expectations on chromium due to different ICU auto behavior; 84 that is, chromium-android seems to follow the generic expectations. 85 1 86 2012-11-05 Adam Barth <abarth@webkit.org> 2 87 -
trunk/Source/WebCore/ChangeLog
r133527 r133529 1 2012-11-05 Glenn Adams <glenn@skynav.com> 2 3 Add support to -webkit-line-break property for CSS3 Text line-break property values and semantics. 4 https://bugs.webkit.org/show_bug.cgi?id=89235 5 6 Reviewed by Eric Seidel. 7 8 See also wiki documentation at: 9 [1] http://trac.webkit.org/wiki/LineBreaking 10 [2] http://trac.webkit.org/wiki/LineBreakingCSS3Mapping 11 12 Web exposed changes include: 13 (1) The default (initial) value for -webkit-line-break becomes 'auto', instead of 'normal'; 14 (2) The values 'auto', 'loose', 'normal', and 'strict' are added to -webkit-line-break; 15 (3) See [2] above for details regarding interpretation. 16 17 Tests: css3/line-break/line-break-auto-centered.html 18 css3/line-break/line-break-auto-half-kana.html 19 css3/line-break/line-break-auto-hyphens.html 20 css3/line-break/line-break-auto-inseparables.html 21 css3/line-break/line-break-auto-iteration-marks.html 22 css3/line-break/line-break-auto-postfixes.html 23 css3/line-break/line-break-auto-prefixes.html 24 css3/line-break/line-break-auto-sound-marks.html 25 css3/line-break/line-break-loose-centered.html 26 css3/line-break/line-break-loose-half-kana.html 27 css3/line-break/line-break-loose-hyphens.html 28 css3/line-break/line-break-loose-inseparables.html 29 css3/line-break/line-break-loose-iteration-marks.html 30 css3/line-break/line-break-loose-postfixes.html 31 css3/line-break/line-break-loose-prefixes.html 32 css3/line-break/line-break-loose-sound-marks.html 33 css3/line-break/line-break-normal-centered.html 34 css3/line-break/line-break-normal-half-kana.html 35 css3/line-break/line-break-normal-hyphens.html 36 css3/line-break/line-break-normal-inseparables.html 37 css3/line-break/line-break-normal-iteration-marks.html 38 css3/line-break/line-break-normal-postfixes.html 39 css3/line-break/line-break-normal-prefixes.html 40 css3/line-break/line-break-normal-sound-marks.html 41 css3/line-break/line-break-strict-centered.html 42 css3/line-break/line-break-strict-half-kana.html 43 css3/line-break/line-break-strict-hyphens.html 44 css3/line-break/line-break-strict-inseparables.html 45 css3/line-break/line-break-strict-iteration-marks.html 46 css3/line-break/line-break-strict-postfixes.html 47 css3/line-break/line-break-strict-prefixes.html 48 css3/line-break/line-break-strict-sound-marks.html 49 50 * platform/text/LineBreakIteratorPoolICU.h: 51 (WebCore::LineBreakIteratorPool::makeLocaleWithBreakKeyword): 52 Add static function to construct ICU locale argument (also used as pool key) with additional 53 break keyword. 54 (WebCore::LineBreakIteratorPool::take): 55 (WebCore::LineBreakIteratorPool::put): 56 (LineBreakIteratorPool): 57 Remove direct dependency from ICU library (and types), moving that dependency into 58 new {open,close}LineBreakIterator() functions (defined in TextBreakIteratorICU.cpp). 59 Update to take line break mode into account. 60 Create (and cache) different break iterators depending on line break mode (in addition to locale), 61 which entails expanding pool entry key format to optionally append "@break=" + 62 "loose"|"normal"|"strict" keyword to locale string. 63 64 * platform/text/TextBreakIterator.h: 65 (WebCore::LazyLineBreakIterator::LazyLineBreakIterator): 66 (WebCore::LazyLineBreakIterator::isLooseCJKMode): 67 (WebCore::LazyLineBreakIterator::get): 68 (WebCore::LazyLineBreakIterator::reset): 69 (LazyLineBreakIterator): 70 Define LineBreakIteratorMode enumeration for use in TextBreakIterator et al. 71 Add state member to indicate line break mode. 72 73 * platform/text/TextBreakIteratorICU.cpp: 74 (WebCore::acquireLineBreakIterator): 75 Use new line break mode when making iterator from pool. 76 Handle change of return type of LineBreakIteratorPool::take() to non-ICU type, 77 i.e., TextBreakIterator* instead of ICU's UBreakIterator*. 78 (WebCore::releaseLineBreakIterator): 79 Handle change of parameter type of LineBreakIteratorPool::put() to non-ICU type, 80 i.e., TextBreakIterator* instead of ICU's UBreakIterator*. 81 (WebCore::isCJKLocale): 82 New functions for determining if CJK rules apply. 83 (WebCore::openLineBreakIterator): 84 New function for abstracting opening of ICU style line break iterator. This is now 85 used in LineBreakIteratorPoolICU.h rather than having direct ICU API dependency there. 86 This function also takes into account the line break mode. 87 (WebCore::closeLineBreakIterator): 88 (WebCore::mapLineIteratorModeToRules): 89 New function for abstracting closing of ICU style line break iterator. This is now 90 used in LineBreakIteratorPoolICU.h rather than having direct ICU API dependency there. 91 92 * rendering/RenderBlockLineLayout.cpp: 93 (WebCore::RenderBlock::LineBreaker::nextLineBreak): 94 Pass line break iterator mode flag when reseting LazyLineBreakIterator. 95 Add looseMode local variable to prevent need for computing under isBreakable(). 96 97 * rendering/RenderText.cpp: 98 (WebCore::mapLineBreakToIteratorMode): 99 Add implementation for mapLineBreakToIteratorMode(), used by both RenderText::computePreferredLogicalWidths 100 and RenderBlock::LineBreaker::nextLineBreak. 101 (WebCore::RenderText::computePreferredLogicalWidths): 102 Ensure (lazy line) breakIterator is initialized for line break mode. 103 Ensure isBreakable() is passed loose mode flag to match behavior in RenderBlock::LineBreaker::nextLineBreak. 104 105 * rendering/RenderText.h: 106 (WebCore): 107 Add declaration for mapLineBreakToIteratorMode(), used by both RenderText::computePreferredLogicalWidths 108 and RenderBlock::LineBreaker::nextLineBreak. 109 110 * rendering/break_lines.cpp: 111 (WebCore): 112 Introduce two (local) enums NBSPBehavior and LooseBehavior for expanding template on nextBreakablePosition 113 to include loose mode parameter. 114 (WebCore::isBreakableSpace): 115 Add externally specified loose mode parameter to prevent need to invoke line break iterator 116 accessor method on each invocation. Use new loose mode flavors off NBP functions. 117 (WebCore::needsLineBreakIterator): 118 Introduce loose mode behavior template parameter to optimize loose mode behavior code path in order 119 to prevent regression to non loose mode path. 120 (WebCore::nextBreakablePosition): 121 (WebCore::nextBreakablePositionIgnoringNBSP): 122 Use new template parameter enums described above. 123 (WebCore::nextBreakablePositionIgnoringNBSPLoose): 124 (WebCore::nextBreakablePositionLoose): 125 Introduce two additional 'loose' mode flavors of NBP template expansions. 126 127 * rendering/break_lines.h: 128 (WebCore): 129 (WebCore::isBreakable): 130 Add externally specified loose mode parameter to prevent need to invoke line break iterator 131 accessor method on each invocation. 132 1 133 2012-11-05 Adam Barth <abarth@webkit.org> 2 134 -
trunk/Source/WebCore/platform/text/LineBreakIteratorPoolICU.h
r133386 r133529 27 27 #define LineBreakIteratorPoolICU_h 28 28 29 #include "TextBreakIterator.h" 29 30 #include "TextBreakIteratorInternalICU.h" 30 #include <unicode/ubrk.h>31 31 #include <wtf/Assertions.h> 32 32 #include <wtf/HashMap.h> … … 35 35 #include <wtf/text/AtomicString.h> 36 36 #include <wtf/text/CString.h> 37 #include <wtf/text/StringBuilder.h> 37 38 38 39 namespace WebCore { … … 49 50 static PassOwnPtr<LineBreakIteratorPool> create() { return adoptPtr(new LineBreakIteratorPool); } 50 51 51 UBreakIterator* take(const AtomicString& locale)52 static String makeLocaleWithBreakKeyword(const AtomicString& locale, LineBreakIteratorMode mode) 52 53 { 53 UBreakIterator* iterator = 0; 54 StringBuilder localeWithKeyword; 55 localeWithKeyword.append(locale); 56 localeWithKeyword.appendLiteral("@break="); 57 switch (mode) { 58 case LineBreakIteratorModeUAX14: 59 break; 60 case LineBreakIteratorModeUAX14Loose: 61 localeWithKeyword.appendLiteral("loose"); 62 break; 63 case LineBreakIteratorModeUAX14Normal: 64 localeWithKeyword.appendLiteral("normal"); 65 break; 66 case LineBreakIteratorModeUAX14Strict: 67 localeWithKeyword.appendLiteral("strict"); 68 break; 69 } 70 return localeWithKeyword.toString(); 71 } 72 73 TextBreakIterator* take(const AtomicString& locale, LineBreakIteratorMode mode, bool isCJK) 74 { 75 AtomicString localeWithOptionalBreakKeyword; 76 if (mode == LineBreakIteratorModeUAX14) 77 localeWithOptionalBreakKeyword = locale; 78 else 79 localeWithOptionalBreakKeyword = makeLocaleWithBreakKeyword(locale, mode); 80 81 TextBreakIterator* iterator = 0; 54 82 for (size_t i = 0; i < m_pool.size(); ++i) { 55 if (m_pool[i].first == locale ) {83 if (m_pool[i].first == localeWithOptionalBreakKeyword) { 56 84 iterator = m_pool[i].second; 57 85 m_pool.remove(i); … … 61 89 62 90 if (!iterator) { 63 UErrorCode openStatus = U_ZERO_ERROR; 64 bool localeIsEmpty = locale.isEmpty(); 65 iterator = ubrk_open(UBRK_LINE, localeIsEmpty ? currentTextBreakLocaleID() : locale.string().utf8().data(), 0, 0, &openStatus); 66 // locale comes from a web page and it can be invalid, leading ICU 67 // to fail, in which case we fall back to the default locale. 68 if (!localeIsEmpty && U_FAILURE(openStatus)) { 69 openStatus = U_ZERO_ERROR; 70 iterator = ubrk_open(UBRK_LINE, currentTextBreakLocaleID(), 0, 0, &openStatus); 71 } 72 73 if (U_FAILURE(openStatus)) { 74 LOG_ERROR("ubrk_open failed with status %d", openStatus); 91 iterator = openLineBreakIterator(localeWithOptionalBreakKeyword, mode, isCJK); 92 if (!iterator) 75 93 return 0; 76 }77 94 } 78 95 79 96 ASSERT(!m_vendedIterators.contains(iterator)); 80 m_vendedIterators.set(iterator, locale );97 m_vendedIterators.set(iterator, localeWithOptionalBreakKeyword); 81 98 return iterator; 82 99 } 83 100 84 void put( UBreakIterator* iterator)101 void put(TextBreakIterator* iterator) 85 102 { 86 103 ASSERT_ARG(iterator, m_vendedIterators.contains(iterator)); 87 104 88 105 if (m_pool.size() == capacity) { 89 ubrk_close(m_pool[0].second);106 closeLineBreakIterator(m_pool[0].second); 90 107 m_pool.remove(0); 91 108 } … … 99 116 static const size_t capacity = 4; 100 117 101 typedef pair<AtomicString, UBreakIterator*> Entry;118 typedef pair<AtomicString, TextBreakIterator*> Entry; 102 119 typedef Vector<Entry, capacity> Pool; 103 120 Pool m_pool; 104 HashMap< UBreakIterator*, AtomicString> m_vendedIterators;121 HashMap<TextBreakIterator*, AtomicString> m_vendedIterators; 105 122 106 123 friend WTF::ThreadSpecific<LineBreakIteratorPool>::operator LineBreakIteratorPool*(); -
trunk/Source/WebCore/platform/text/TextBreakIterator.h
r133386 r133529 32 32 // Note: The returned iterator is good only until you get another iterator, with the exception of acquireLineBreakIterator. 33 33 34 enum LineBreakIteratorMode { 35 LineBreakIteratorModeUAX14, 36 LineBreakIteratorModeUAX14Loose, 37 LineBreakIteratorModeUAX14Normal, 38 LineBreakIteratorModeUAX14Strict, 39 }; 40 34 41 // This is similar to character break iterator in most cases, but is subject to 35 42 // platform UI conventions. One notable example where this can be different … … 39 46 40 47 TextBreakIterator* wordBreakIterator(const UChar*, int length); 41 TextBreakIterator* acquireLineBreakIterator(const LChar*, int length, const AtomicString& locale );42 TextBreakIterator* acquireLineBreakIterator(const UChar*, int length, const AtomicString& locale );48 TextBreakIterator* acquireLineBreakIterator(const LChar*, int length, const AtomicString& locale, LineBreakIteratorMode, bool isCJK); 49 TextBreakIterator* acquireLineBreakIterator(const UChar*, int length, const AtomicString& locale, LineBreakIteratorMode, bool isCJK); 43 50 void releaseLineBreakIterator(TextBreakIterator*); 51 TextBreakIterator* openLineBreakIterator(const AtomicString& locale, LineBreakIteratorMode, bool isCJK); 52 void closeLineBreakIterator(TextBreakIterator*&); 44 53 TextBreakIterator* sentenceBreakIterator(const UChar*, int length); 45 54 … … 56 65 const int TextBreakDone = -1; 57 66 67 bool isCJKLocale(const AtomicString&); 68 58 69 class LazyLineBreakIterator { 59 70 public: 60 71 LazyLineBreakIterator() 61 : m_iterator(0) 72 : m_mode(LineBreakIteratorModeUAX14) 73 , m_isCJK(false) 74 , m_iterator(0) 62 75 { 63 76 } 64 77 65 LazyLineBreakIterator(String string, const AtomicString& locale = AtomicString() )78 LazyLineBreakIterator(String string, const AtomicString& locale = AtomicString(), LineBreakIteratorMode mode = LineBreakIteratorModeUAX14) 66 79 : m_string(string) 67 80 , m_locale(locale) 81 , m_mode(mode) 68 82 , m_iterator(0) 69 83 { 84 m_isCJK = isCJKLocale(locale); 70 85 } 71 86 … … 77 92 78 93 String string() const { return m_string; } 94 bool isLooseCJKMode() const { return m_isCJK && m_mode == LineBreakIteratorModeUAX14Loose; } 79 95 80 96 TextBreakIterator* get() … … 82 98 if (!m_iterator) { 83 99 if (m_string.is8Bit()) 84 m_iterator = acquireLineBreakIterator(m_string.characters8(), m_string.length(), m_locale );100 m_iterator = acquireLineBreakIterator(m_string.characters8(), m_string.length(), m_locale, m_mode, m_isCJK); 85 101 else 86 m_iterator = acquireLineBreakIterator(m_string.characters16(), m_string.length(), m_locale );102 m_iterator = acquireLineBreakIterator(m_string.characters16(), m_string.length(), m_locale, m_mode, m_isCJK); 87 103 } 88 104 return m_iterator; 89 105 } 90 106 91 void reset(String string, const AtomicString& locale )107 void reset(String string, const AtomicString& locale, LineBreakIteratorMode mode) 92 108 { 93 109 if (m_iterator) … … 96 112 m_string = string; 97 113 m_locale = locale; 114 m_mode = mode; 115 m_isCJK = isCJKLocale(locale); 98 116 m_iterator = 0; 99 117 } … … 102 120 String m_string; 103 121 AtomicString m_locale; 122 LineBreakIteratorMode m_mode; 123 bool m_isCJK; 104 124 TextBreakIterator* m_iterator; 105 125 }; -
trunk/Source/WebCore/platform/text/TextBreakIteratorICU.cpp
r133386 r133529 24 24 25 25 #include "LineBreakIteratorPoolICU.h" 26 #include <unicode/ubrk.h> 27 #include <unicode/uloc.h> 26 28 #include <wtf/Atomics.h> 27 29 #include <wtf/text/WTFString.h> … … 261 263 } 262 264 263 TextBreakIterator* acquireLineBreakIterator(const LChar* string, int length, const AtomicString& locale )264 { 265 UBreakIterator* iterator = LineBreakIteratorPool::sharedPool().take(locale);265 TextBreakIterator* acquireLineBreakIterator(const LChar* string, int length, const AtomicString& locale, LineBreakIteratorMode mode, bool isCJK) 266 { 267 TextBreakIterator* iterator = LineBreakIteratorPool::sharedPool().take(locale, mode, isCJK); 266 268 if (!iterator) 267 269 return 0; … … 279 281 } 280 282 283 UBreakIterator* ubrkIter = reinterpret_cast<UBreakIterator*>(iterator); 281 284 UErrorCode setTextStatus = U_ZERO_ERROR; 282 ubrk_setUText( iterator, uTextLatin1, &setTextStatus);285 ubrk_setUText(ubrkIter, uTextLatin1, &setTextStatus); 283 286 if (U_FAILURE(setTextStatus)) { 284 287 LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus); … … 288 291 utext_close(uTextLatin1); 289 292 290 return reinterpret_cast<TextBreakIterator*>(iterator);291 } 292 293 TextBreakIterator* acquireLineBreakIterator(const UChar* string, int length, const AtomicString& locale )294 { 295 UBreakIterator* iterator = LineBreakIteratorPool::sharedPool().take(locale);293 return iterator; 294 } 295 296 TextBreakIterator* acquireLineBreakIterator(const UChar* string, int length, const AtomicString& locale, LineBreakIteratorMode mode, bool isCJK) 297 { 298 TextBreakIterator* iterator = LineBreakIteratorPool::sharedPool().take(locale, mode, isCJK); 296 299 if (!iterator) 297 300 return 0; 298 301 302 UBreakIterator* ubrkIter = reinterpret_cast<UBreakIterator*>(iterator); 299 303 UErrorCode setTextStatus = U_ZERO_ERROR; 300 ubrk_setText( iterator, string, length, &setTextStatus);304 ubrk_setText(ubrkIter, string, length, &setTextStatus); 301 305 if (U_FAILURE(setTextStatus)) { 302 306 LOG_ERROR("ubrk_setText failed with status %d", setTextStatus); … … 304 308 } 305 309 306 return reinterpret_cast<TextBreakIterator*>(iterator);310 return iterator; 307 311 } 308 312 … … 311 315 ASSERT_ARG(iterator, iterator); 312 316 313 LineBreakIteratorPool::sharedPool().put(reinterpret_cast<UBreakIterator*>(iterator)); 317 LineBreakIteratorPool::sharedPool().put(iterator); 318 } 319 320 // Recognize BCP47 compliant primary language values of 'zh', 'ja', 'ko' 321 // (in any combination of case), optionally followed by subtags. Don't 322 // recognize 3-letter variants 'chi'/'zho', 'jpn', or 'kor' since BCP47 323 // requires use of shortest language tag. 324 template<typename T> 325 static bool isCJKLocale(const T* s, size_t length) 326 { 327 if (!s || length < 2) 328 return false; 329 T c1 = s[0]; 330 T c2 = s[1]; 331 T c3 = length == 2 ? 0 : s[2]; 332 if (!c3 || c3 == '-' || c3 == '_' || c3 == '@') { 333 if (c1 == 'z' || c1 == 'Z') 334 return c2 == 'h' || c2 == 'H'; 335 if (c1 == 'j' || c1 == 'J') 336 return c2 == 'a' || c2 == 'A'; 337 if (c1 == 'k' || c1 == 'K') 338 return c2 == 'o' || c2 == 'O'; 339 } 340 return false; 341 } 342 343 bool isCJKLocale(const AtomicString& locale) 344 { 345 if (locale.isEmpty()) 346 return false; 347 size_t length = locale.length(); 348 if (locale.is8Bit()) 349 return isCJKLocale<LChar>(locale.characters8(), length); 350 return isCJKLocale<UChar>(locale.characters16(), length); 351 } 352 353 static void mapLineIteratorModeToRules(LineBreakIteratorMode, bool isCJK, String& rules); 354 355 TextBreakIterator* openLineBreakIterator(const AtomicString& locale, LineBreakIteratorMode mode, bool isCJK) 356 { 357 UBreakIterator* ubrkIter; 358 UErrorCode openStatus = U_ZERO_ERROR; 359 bool isLocaleEmpty = locale.isEmpty(); 360 if ((mode == LineBreakIteratorModeUAX14) && !isCJK) 361 ubrkIter = ubrk_open(UBRK_LINE, isLocaleEmpty ? currentTextBreakLocaleID() : locale.string().utf8().data(), 0, 0, &openStatus); 362 else { 363 UParseError parseStatus; 364 String rules; 365 mapLineIteratorModeToRules(mode, isCJK, rules); 366 ubrkIter = ubrk_openRules(rules.characters(), rules.length(), 0, 0, &parseStatus, &openStatus); 367 } 368 // Locale comes from a web page and it can be invalid, leading ICU 369 // to fail, in which case we fall back to the default locale (with default rules). 370 if (!isLocaleEmpty && U_FAILURE(openStatus)) { 371 openStatus = U_ZERO_ERROR; 372 ubrkIter = ubrk_open(UBRK_LINE, currentTextBreakLocaleID(), 0, 0, &openStatus); 373 } 374 375 if (U_FAILURE(openStatus)) { 376 LOG_ERROR("ubrk_open failed with status %d", openStatus); 377 ASSERT(!ubrkIter); 378 } 379 return reinterpret_cast<TextBreakIterator*>(ubrkIter); 380 } 381 382 void closeLineBreakIterator(TextBreakIterator*& iterator) 383 { 384 UBreakIterator* ubrkIter = reinterpret_cast<UBreakIterator*>(iterator); 385 ASSERT(ubrkIter); 386 ubrk_close(ubrkIter); 387 iterator = 0; 314 388 } 315 389 … … 511 585 } 512 586 513 } 587 static const char* uax14Prologue = 588 "!!chain;" 589 "!!LBCMNoChain;" 590 "!!lookAheadHardBreak;"; 591 592 static const char* uax14AssignmentsBefore = 593 // explicitly enumerate $CJ since ICU versions prior to 49 don't support :LineBreak=Conditional_Japanese_Starter: 594 "$CJ = [" 595 #if (U_ICU_VERSION_MAJOR_NUM >= 4) && (U_ICU_VERSION_MINOR_NUM >= 9) 596 ":LineBreak=Conditional_Japanese_Starter:" 597 #else 598 "\\u3041\\u3043\\u3045\\u3047\\u3049\\u3063\\u3083\\u3085\\u3087\\u308E\\u3095\\u3096\\u30A1\\u30A3\\u30A5\\u30A7" 599 "\\u30A9\\u30C3\\u30E3\\u30E5\\u30E7\\u30EE\\u30F5\\u30F6\\u30FC" 600 "\\u31F0\\u31F1\\u31F2\\u31F3\\u31F4\\u31F5\\u31F6\\u31F7\\u31F8\\u31F9\\u31FA\\u31FB\\u31FC\\u31FD\\u31FE\\u31FF" 601 "\\uFF67\\uFF68\\uFF69\\uFF6A\\uFF6B\\uFF6C\\uFF6D\\uFF6E\\uFF6F\\uFF70" 602 #endif 603 "];"; 604 605 static const char* uax14AssignmentsCustomLooseCJK = 606 "$BA_SUB = [\\u2010\\u2013];" 607 "$EX_SUB = [\\u0021\\u003F\\uFF01\\uFF1F];" 608 "$ID_SUB = '';" 609 "$IN_SUB = [\\u2025\\u2026];" 610 "$IS_SUB = [\\u003A\\u003B];" 611 "$NS_SUB = [\\u203C\\u2047\\u2048\\u2049\\u3005\\u301C\\u303B\\u309D\\u309E\\u30A0\\u30FB\\u30FD\\u30FE\\uFF1A\\uFF1B\\uFF65];" 612 "$PO_SUB = [\\u0025\\u00A2\\u00B0\\u2030\\u2032\\u2033\\u2103\\uFF05\\uFFE0];" 613 "$PR_SUB = [\\u0024\\u00A3\\u00A5\\u20AC\\u2116\\uFF04\\uFFE1\\uFFE5];" 614 "$ID_ADD = [$CJ $BA_SUB $EX_SUB $IN_SUB $IS_SUB $NS_SUB $PO_SUB $PR_SUB];" 615 "$NS_ADD = '';"; 616 617 static const char* uax14AssignmentsCustomLooseNonCJK = 618 "$BA_SUB = '';" 619 "$EX_SUB = '';" 620 "$ID_SUB = '';" 621 "$IN_SUB = [\\u2025\\u2026];" 622 "$IS_SUB = '';" 623 "$NS_SUB = [\\u3005\\u303B\\u309D\\u309E\\u30FD\\u30FE];" 624 "$PO_SUB = '';" 625 "$PR_SUB = '';" 626 "$ID_ADD = [$CJ $IN_SUB $NS_SUB];" 627 "$NS_ADD = '';"; 628 629 static const char* uax14AssignmentsCustomNormalCJK = 630 "$BA_SUB = [\\u2010\\u2013];" 631 "$EX_SUB = '';" 632 "$IN_SUB = '';" 633 "$ID_SUB = '';" 634 "$IS_SUB = '';" 635 "$NS_SUB = [\\u301C\\u30A0];" 636 "$PO_SUB = '';" 637 "$PR_SUB = '';" 638 "$ID_ADD = [$CJ $BA_SUB $NS_SUB];" 639 "$NS_ADD = '';"; 640 641 static const char* uax14AssignmentsCustomNormalNonCJK = 642 "$BA_SUB = '';" 643 "$EX_SUB = '';" 644 "$ID_SUB = '';" 645 "$IN_SUB = '';" 646 "$IS_SUB = '';" 647 "$NS_SUB = '';" 648 "$PO_SUB = '';" 649 "$PR_SUB = '';" 650 "$ID_ADD = [$CJ];" 651 "$NS_ADD = '';"; 652 653 static const char* uax14AssignmentsCustomStrictCJK = 654 "$BA_SUB = '';" 655 "$EX_SUB = '';" 656 "$ID_SUB = '';" 657 "$IN_SUB = '';" 658 "$IS_SUB = '';" 659 "$NS_SUB = '';" 660 "$PO_SUB = '';" 661 "$PR_SUB = '';" 662 "$ID_ADD = '';" 663 "$NS_ADD = [$CJ];"; 664 665 #define uax14AssignmentsCustomStrictNonCJK uax14AssignmentsCustomStrictCJK 666 #define uax14AssignmentsCustomDefaultCJK uax14AssignmentsCustomNormalCJK 667 #define uax14AssignmentsCustomDefaultNonCJK uax14AssignmentsCustomStrictNonCJK 668 669 static const char* uax14AssignmentsAfter = 670 "$AI = [:LineBreak = Ambiguous:];" 671 "$AL = [:LineBreak = Alphabetic:];" 672 "$BA = [[:LineBreak = Break_After:] - $BA_SUB];" 673 "$BB = [:LineBreak = Break_Before:];" 674 "$BK = [:LineBreak = Mandatory_Break:];" 675 "$B2 = [:LineBreak = Break_Both:];" 676 "$CB = [:LineBreak = Contingent_Break:];" 677 "$CL = [:LineBreak = Close_Punctuation:];" 678 "$CM = [:LineBreak = Combining_Mark:];" 679 "$CP = [:LineBreak = Close_Parenthesis:];" 680 "$CR = [:LineBreak = Carriage_Return:];" 681 "$EX = [[:LineBreak = Exclamation:] - $EX_SUB];" 682 "$GL = [:LineBreak = Glue:];" 683 #if (U_ICU_VERSION_MAJOR_NUM >= 4) && (U_ICU_VERSION_MINOR_NUM >= 9) 684 "$HL = [:LineBreak = Hebrew_Letter:];" 685 #else 686 "$HL = [[:Hebrew:] & [:Letter:]];" 687 #endif 688 "$HY = [:LineBreak = Hyphen:];" 689 "$H2 = [:LineBreak = H2:];" 690 "$H3 = [:LineBreak = H3:];" 691 "$ID = [[[[:LineBreak = Ideographic:] - $CJ] $ID_ADD] - $ID_SUB];" 692 "$IN = [[:LineBreak = Inseparable:] - $IN_SUB];" 693 "$IS = [[:LineBreak = Infix_Numeric:] - $IS_SUB];" 694 "$JL = [:LineBreak = JL:];" 695 "$JV = [:LineBreak = JV:];" 696 "$JT = [:LineBreak = JT:];" 697 "$LF = [:LineBreak = Line_Feed:];" 698 "$NL = [:LineBreak = Next_Line:];" 699 "$NS = [[[[:LineBreak = Nonstarter:] - $CJ] $NS_ADD] - $NS_SUB];" 700 "$NU = [:LineBreak = Numeric:];" 701 "$OP = [:LineBreak = Open_Punctuation:];" 702 "$PO = [[:LineBreak = Postfix_Numeric:] - $PO_SUB];" 703 "$PR = [[:LineBreak = Prefix_Numeric:] - $PR_SUB];" 704 "$QU = [:LineBreak = Quotation:];" 705 "$SA = [:LineBreak = Complex_Context:];" 706 "$SG = [:LineBreak = Surrogate:];" 707 "$SP = [:LineBreak = Space:];" 708 "$SY = [:LineBreak = Break_Symbols:];" 709 "$WJ = [:LineBreak = Word_Joiner:];" 710 "$XX = [:LineBreak = Unknown:];" 711 "$ZW = [:LineBreak = ZWSpace:];" 712 "$dictionary = [:LineBreak = Complex_Context:];" 713 "$ALPlus = [$AL $AI $SA $SG $XX];" 714 "$ALcm = $ALPlus $CM*;" 715 "$BAcm = $BA $CM*;" 716 "$BBcm = $BB $CM*;" 717 "$B2cm = $B2 $CM*;" 718 "$CLcm = $CL $CM*;" 719 "$CPcm = $CP $CM*;" 720 "$EXcm = $EX $CM*;" 721 "$GLcm = $GL $CM*;" 722 "$HLcm = $HL $CM*;" 723 "$HYcm = $HY $CM*;" 724 "$H2cm = $H2 $CM*;" 725 "$H3cm = $H3 $CM*;" 726 "$IDcm = $ID $CM*;" 727 "$INcm = $IN $CM*;" 728 "$IScm = $IS $CM*;" 729 "$JLcm = $JL $CM*;" 730 "$JVcm = $JV $CM*;" 731 "$JTcm = $JT $CM*;" 732 "$NScm = $NS $CM*;" 733 "$NUcm = $NU $CM*;" 734 "$OPcm = $OP $CM*;" 735 "$POcm = $PO $CM*;" 736 "$PRcm = $PR $CM*;" 737 "$QUcm = $QU $CM*;" 738 "$SYcm = $SY $CM*;" 739 "$WJcm = $WJ $CM*;"; 740 741 static const char* uax14Forward = 742 "!!forward;" 743 "$CAN_CM = [^$SP $BK $CR $LF $NL $ZW $CM];" 744 "$CANT_CM = [$SP $BK $CR $LF $NL $ZW $CM];" 745 "$AL_FOLLOW_NOCM = [$BK $CR $LF $NL $ZW $SP];" 746 "$AL_FOLLOW_CM = [$CL $CP $EX $HL $IS $SY $WJ $GL $OP $QU $BA $HY $NS $IN $NU $ALPlus];" 747 "$AL_FOLLOW = [$AL_FOLLOW_NOCM $AL_FOLLOW_CM];" 748 "$LB4Breaks = [$BK $CR $LF $NL];" 749 "$LB4NonBreaks = [^$BK $CR $LF $NL];" 750 "$LB8Breaks = [$LB4Breaks $ZW];" 751 "$LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];" 752 "$LB18NonBreaks = [$LB8NonBreaks - [$SP]];" 753 "$LB18Breaks = [$LB8Breaks $SP];" 754 "$LB20NonBreaks = [$LB18NonBreaks - $CB];" 755 "$ALPlus $CM+;" 756 "$BA $CM+;" 757 "$BB $CM+;" 758 "$B2 $CM+;" 759 "$CL $CM+;" 760 "$CP $CM+;" 761 "$EX $CM+;" 762 "$GL $CM+;" 763 "$HL $CM+;" 764 "$HY $CM+;" 765 "$H2 $CM+;" 766 "$H3 $CM+;" 767 "$ID $CM+;" 768 "$IN $CM+;" 769 "$IS $CM+;" 770 "$JL $CM+;" 771 "$JV $CM+;" 772 "$JT $CM+;" 773 "$NS $CM+;" 774 "$NU $CM+;" 775 "$OP $CM+;" 776 "$PO $CM+;" 777 "$PR $CM+;" 778 "$QU $CM+;" 779 "$SY $CM+;" 780 "$WJ $CM+;" 781 "$CR $LF {100};" 782 "$LB4NonBreaks? $LB4Breaks {100};" 783 "$CAN_CM $CM* $LB4Breaks {100};" 784 "$CM+ $LB4Breaks {100};" 785 "$LB4NonBreaks [$SP $ZW];" 786 "$CAN_CM $CM* [$SP $ZW];" 787 "$CM+ [$SP $ZW];" 788 "$CAN_CM $CM+;" 789 "$CM+;" 790 "$CAN_CM $CM* $WJcm;" 791 "$LB8NonBreaks $WJcm;" 792 "$CM+ $WJcm;" 793 "$WJcm $CANT_CM;" 794 "$WJcm $CAN_CM $CM*;" 795 "$GLcm $CAN_CM $CM*;" 796 "$GLcm $CANT_CM;" 797 "[[$LB8NonBreaks] - [$SP $BA $HY]] $CM* $GLcm;" 798 "$CM+ GLcm;" 799 "$LB8NonBreaks $CL;" 800 "$CAN_CM $CM* $CL;" 801 "$CM+ $CL;" 802 "$LB8NonBreaks $CP;" 803 "$CAN_CM $CM* $CP;" 804 "$CM+ $CP;" 805 "$LB8NonBreaks $EX;" 806 "$CAN_CM $CM* $EX;" 807 "$CM+ $EX;" 808 "$LB8NonBreaks $IS;" 809 "$CAN_CM $CM* $IS;" 810 "$CM+ $IS;" 811 "$LB8NonBreaks $SY;" 812 "$CAN_CM $CM* $SY;" 813 "$CM+ $SY;" 814 "$OPcm $SP* $CAN_CM $CM*;" 815 "$OPcm $SP* $CANT_CM;" 816 "$OPcm $SP+ $CM+ $AL_FOLLOW?;" 817 "$QUcm $SP* $OPcm;" 818 "($CLcm | $CPcm) $SP* $NScm;" 819 "$B2cm $SP* $B2cm;" 820 "$LB18NonBreaks $CM* $QUcm;" 821 "$CM+ $QUcm;" 822 "$QUcm .?;" 823 "$QUcm $LB18NonBreaks $CM*;" 824 "$LB20NonBreaks $CM* ($BAcm | $HYcm | $NScm); " 825 "$BBcm [^$CB];" 826 "$BBcm $LB20NonBreaks $CM*;" 827 "$HLcm ($HYcm | $BAcm) [^$CB]?;" 828 "($ALcm | $HLcm) $INcm;" 829 "$CM+ $INcm;" 830 "$IDcm $INcm;" 831 "$INcm $INcm;" 832 "$NUcm $INcm;" 833 "$IDcm $POcm;" 834 "$ALcm $NUcm;" 835 "$HLcm $NUcm;" 836 "$CM+ $NUcm;" 837 "$NUcm $ALcm;" 838 "$NUcm $HLcm;" 839 "$PRcm $IDcm;" 840 "$PRcm ($ALcm | $HLcm);" 841 "$POcm ($ALcm | $HLcm);" 842 "($PRcm | $POcm)? ($OPcm | $HYcm)? $NUcm ($NUcm | $SYcm | $IScm)* ($CLcm | $CPcm)? ($PRcm | $POcm)?;" 843 "$JLcm ($JLcm | $JVcm | $H2cm | $H3cm);" 844 "($JVcm | $H2cm) ($JVcm | $JTcm);" 845 "($JTcm | $H3cm) $JTcm;" 846 "($JLcm | $JVcm | $JTcm | $H2cm | $H3cm) $INcm;" 847 "($JLcm | $JVcm | $JTcm | $H2cm | $H3cm) $POcm;" 848 "$PRcm ($JLcm | $JVcm | $JTcm | $H2cm | $H3cm);" 849 "($ALcm | $HLcm) ($ALcm | $HLcm);" 850 "$CM+ ($ALcm | $HLcm);" 851 "$IScm ($ALcm | $HLcm);" 852 "($ALcm | $HLcm | $NUcm) $OPcm;" 853 "$CM+ $OPcm;" 854 "$CPcm ($ALcm | $HLcm | $NUcm);"; 855 856 static const char* uax14Reverse = 857 "!!reverse;" 858 "$CM+ $ALPlus;" 859 "$CM+ $BA;" 860 "$CM+ $BB;" 861 "$CM+ $B2;" 862 "$CM+ $CL;" 863 "$CM+ $CP;" 864 "$CM+ $EX;" 865 "$CM+ $GL;" 866 "$CM+ $HL;" 867 "$CM+ $HY;" 868 "$CM+ $H2;" 869 "$CM+ $H3;" 870 "$CM+ $ID;" 871 "$CM+ $IN;" 872 "$CM+ $IS;" 873 "$CM+ $JL;" 874 "$CM+ $JV;" 875 "$CM+ $JT;" 876 "$CM+ $NS;" 877 "$CM+ $NU;" 878 "$CM+ $OP;" 879 "$CM+ $PO;" 880 "$CM+ $PR;" 881 "$CM+ $QU;" 882 "$CM+ $SY;" 883 "$CM+ $WJ;" 884 "$CM+;" 885 "$AL_FOLLOW $CM+ / ([$BK $CR $LF $NL $ZW {eof}] | $SP+ $CM+ $SP | $SP+ $CM* ([^$OP $CM $SP] | [$AL {eof}]));" 886 "[$PR] / $CM+ [$BK $CR $LF $NL $ZW $SP {eof}];" 887 "$LB4Breaks [$LB4NonBreaks-$CM];" 888 "$LB4Breaks $CM+ $CAN_CM;" 889 "$LF $CR;" 890 "[$SP $ZW] [$LB4NonBreaks-$CM];" 891 "[$SP $ZW] $CM+ $CAN_CM;" 892 "$CM+ $CAN_CM;" 893 "$CM* $WJ $CM* $CAN_CM;" 894 "$CM* $WJ [$LB8NonBreaks-$CM];" 895 "$CANT_CM $CM* $WJ;" 896 "$CM* $CAN_CM $CM* $WJ;" 897 "$CM* $GL $CM* [$LB8NonBreaks-[$CM $SP $BA $HY]];" 898 "$CANT_CM $CM* $GL;" 899 "$CM* $CAN_CM $CM* $GL;" 900 "$CL $CM+ $CAN_CM;" 901 "$CP $CM+ $CAN_CM;" 902 "$EX $CM+ $CAN_CM;" 903 "$IS $CM+ $CAN_CM;" 904 "$SY $CM+ $CAN_CM;" 905 "$CL [$LB8NonBreaks-$CM];" 906 "$CP [$LB8NonBreaks-$CM];" 907 "$EX [$LB8NonBreaks-$CM];" 908 "$IS [$LB8NonBreaks-$CM];" 909 "$SY [$LB8NonBreaks-$CM];" 910 "[$CL $CP $EX $IS $SY] $CM+ $SP+ $CM* $OP; " 911 "$CM* $CAN_CM $SP* $CM* $OP;" 912 "$CANT_CM $SP* $CM* $OP;" 913 "$AL_FOLLOW? $CM+ $SP $SP* $CM* $OP;" 914 "$AL_FOLLOW_NOCM $CM+ $SP+ $CM* $OP;" 915 "$CM* $AL_FOLLOW_CM $CM+ $SP+ $CM* $OP;" 916 "$SY $CM $SP+ $OP;" 917 "$CM* $OP $SP* $CM* $QU;" 918 "$CM* $NS $SP* $CM* ($CL | $CP);" 919 "$CM* $B2 $SP* $CM* $B2;" 920 "$CM* $QU $CM* $CAN_CM;" 921 "$CM* $QU $LB18NonBreaks;" 922 "$CM* $CAN_CM $CM* $QU;" 923 "$CANT_CM $CM* $QU;" 924 "$CM* ($BA | $HY | $NS) $CM* [$LB20NonBreaks-$CM];" 925 "$CM* [$LB20NonBreaks-$CM] $CM* $BB;" 926 "[^$CB] $CM* $BB;" 927 "[^$CB] $CM* ($HY | $BA) $CM* $HL;" 928 "$CM* $IN $CM* ($ALPlus | $HL);" 929 "$CM* $IN $CM* $ID;" 930 "$CM* $IN $CM* $IN;" 931 "$CM* $IN $CM* $NU;" 932 "$CM* $PO $CM* $ID;" 933 "$CM* $NU $CM* ($ALPlus | $HL);" 934 "$CM* ($ALPlus | $HL) $CM* $NU;" 935 "$CM* $ID $CM* $PR;" 936 "$CM* ($ALPlus | $HL) $CM* $PR;" 937 "$CM* ($ALPlus | $HL) $CM* $PO;" 938 "($CM* ($PR | $PO))? ($CM* ($CL | $CP))? ($CM* ($NU | $IS | $SY))* $CM* $NU ($CM* ($OP | $HY))? ($CM* ($PR | $PO))?;" 939 "$CM* ($H3 | $H2 | $JV | $JL) $CM* $JL;" 940 "$CM* ($JT | $JV) $CM* ($H2 | $JV);" 941 "$CM* $JT $CM* ($H3 | $JT);" 942 "$CM* $IN $CM* ($H3 | $H2 | $JT | $JV | $JL);" 943 "$CM* $PO $CM* ($H3 | $H2 | $JT | $JV | $JL);" 944 "$CM* ($H3 | $H2 | $JT | $JV | $JL) $CM* $PR;" 945 "$CM* ($ALPlus | $HL) $CM* ($ALPlus | $HL);" 946 "$CM* ($ALPlus | $HL) $CM* $IS;" 947 "$CM* $OP $CM* ($ALPlus | $HL | $NU);" 948 "$CM* ($ALPlus | $HL | $NU) $CM* $CP;"; 949 950 static const char* uax14SafeForward = 951 "!!safe_forward;" 952 "[$CM $OP $QU $CL $CP $B2 $PR $HY $BA $SP $dictionary]+ [^$CM $OP $QU $CL $CP $B2 $PR $HY $BA $dictionary];" 953 "$dictionary $dictionary;"; 954 955 static const char* uax14SafeReverse = 956 "!!safe_reverse;" 957 "$CM+ [^$CM $BK $CR $LF $NL $ZW $SP];" 958 "$CM+ $SP / .;" 959 "$SP+ $CM* $OP;" 960 "$SP+ $CM* $QU;" 961 "$SP+ $CM* ($CL | $CP);" 962 "$SP+ $CM* $B2;" 963 "$CM* ($HY | $BA) $CM* $HL;" 964 "($CM* ($IS | $SY))+ $CM* $NU;" 965 "($CL | $CP) $CM* ($NU | $IS | $SY);" 966 "$dictionary $dictionary;"; 967 968 static void mapLineIteratorModeToRules(LineBreakIteratorMode mode, bool isCJK, String& rules) 969 { 970 StringBuilder rulesBuilder; 971 rulesBuilder.append(uax14Prologue); 972 rulesBuilder.append(uax14AssignmentsBefore); 973 switch (mode) { 974 case LineBreakIteratorModeUAX14: 975 rulesBuilder.append(isCJK ? uax14AssignmentsCustomDefaultCJK : uax14AssignmentsCustomDefaultNonCJK); 976 break; 977 case LineBreakIteratorModeUAX14Loose: 978 rulesBuilder.append(isCJK ? uax14AssignmentsCustomLooseCJK : uax14AssignmentsCustomLooseNonCJK); 979 break; 980 case LineBreakIteratorModeUAX14Normal: 981 rulesBuilder.append(isCJK ? uax14AssignmentsCustomNormalCJK : uax14AssignmentsCustomNormalNonCJK); 982 break; 983 case LineBreakIteratorModeUAX14Strict: 984 rulesBuilder.append(isCJK ? uax14AssignmentsCustomStrictCJK : uax14AssignmentsCustomStrictNonCJK); 985 break; 986 } 987 rulesBuilder.append(uax14AssignmentsAfter); 988 rulesBuilder.append(uax14Forward); 989 rulesBuilder.append(uax14Reverse); 990 rulesBuilder.append(uax14SafeForward); 991 rulesBuilder.append(uax14SafeReverse); 992 rules = rulesBuilder.toString(); 993 } 994 995 } -
trunk/Source/WebCore/rendering/RenderBlockLineLayout.cpp
r133386 r133529 2550 2550 bool breakAll = currentStyle->wordBreak() == BreakAllWordBreak && autoWrap; 2551 2551 float hyphenWidth = 0; 2552 bool isLooseCJKMode = false; 2552 2553 2553 2554 if (t->isWordBreak()) { … … 2562 2563 renderTextInfo.m_font = &f; 2563 2564 renderTextInfo.m_layout = f.createLayout(t, width.currentWidth(), collapseWhiteSpace); 2564 renderTextInfo.m_lineBreakIterator.reset(t->text(), style->locale()); 2565 renderTextInfo.m_lineBreakIterator.reset(t->text(), style->locale(), mapLineBreakToIteratorMode(blockStyle->lineBreak())); 2566 isLooseCJKMode = renderTextInfo.m_lineBreakIterator.isLooseCJKMode(); 2565 2567 } else if (renderTextInfo.m_layout && renderTextInfo.m_font != &f) { 2566 2568 renderTextInfo.m_font = &f; … … 2599 2601 } 2600 2602 2601 bool betweenWords = c == '\n' || (currWS != PRE && !atStart && isBreakable(renderTextInfo.m_lineBreakIterator, current.m_pos, current.m_nextBreakablePosition, breakNBSP )2603 bool betweenWords = c == '\n' || (currWS != PRE && !atStart && isBreakable(renderTextInfo.m_lineBreakIterator, current.m_pos, current.m_nextBreakablePosition, breakNBSP, isLooseCJKMode) 2602 2604 && (style->hyphens() != HyphensNone || (current.previousInSameNode() != softHyphen))); 2603 2605 -
trunk/Source/WebCore/rendering/RenderText.cpp
r133386 r133529 936 936 } 937 937 938 LineBreakIteratorMode mapLineBreakToIteratorMode(LineBreak lineBreak) 939 { 940 switch (lineBreak) { 941 case LineBreakAuto: 942 case LineBreakAfterWhiteSpace: 943 return LineBreakIteratorModeUAX14; 944 case LineBreakLoose: 945 return LineBreakIteratorModeUAX14Loose; 946 case LineBreakNormal: 947 return LineBreakIteratorModeUAX14Normal; 948 case LineBreakStrict: 949 return LineBreakIteratorModeUAX14Strict; 950 } 951 return LineBreakIteratorModeUAX14; 952 } 953 938 954 void RenderText::computePreferredLogicalWidths(float leadWidth, HashSet<const SimpleFontData*>& fallbackFonts, GlyphOverflow& glyphOverflow) 939 955 { … … 960 976 float wordSpacing = styleToUse->wordSpacing(); 961 977 int len = textLength(); 962 LazyLineBreakIterator breakIterator(m_text, styleToUse->locale() );978 LazyLineBreakIterator breakIterator(m_text, styleToUse->locale(), mapLineBreakToIteratorMode(styleToUse->lineBreak())); 963 979 bool needsWordSpacing = false; 964 980 bool ignoringSpaces = false; … … 995 1011 bool breakNBSP = styleToUse->autoWrap() && styleToUse->nbspMode() == SPACE; 996 1012 bool breakAll = (styleToUse->wordBreak() == BreakAllWordBreak || styleToUse->wordBreak() == BreakWordBreak) && styleToUse->autoWrap(); 1013 bool isLooseCJKMode = breakIterator.isLooseCJKMode(); 997 1014 998 1015 for (int i = 0; i < len; i++) { … … 1042 1059 } 1043 1060 1044 bool hasBreak = breakAll || isBreakable(breakIterator, i, nextBreakable, breakNBSP );1061 bool hasBreak = breakAll || isBreakable(breakIterator, i, nextBreakable, breakNBSP, isLooseCJKMode); 1045 1062 bool betweenWords = true; 1046 1063 int j = i; … … 1050 1067 break; 1051 1068 c = characterAt(j); 1052 if (isBreakable(breakIterator, j, nextBreakable, breakNBSP ) && characterAt(j - 1) != softHyphen)1069 if (isBreakable(breakIterator, j, nextBreakable, breakNBSP, isLooseCJKMode) && characterAt(j - 1) != softHyphen) 1053 1070 break; 1054 1071 if (breakAll) { -
trunk/Source/WebCore/rendering/RenderText.h
r133386 r133529 234 234 void applyTextTransform(const RenderStyle*, String&, UChar); 235 235 236 LineBreakIteratorMode mapLineBreakToIteratorMode(LineBreak); 237 236 238 } // namespace WebCore 237 239 -
trunk/Source/WebCore/rendering/break_lines.cpp
r133386 r133529 39 39 namespace WebCore { 40 40 41 template<bool treatNoBreakSpaceAsBreak> 41 // Parameterization for non-breaking space (U+00A0) behavior. 42 enum NBSPBehavior { 43 IgnoreNBSP, 44 TreatNBSPAsBreak, 45 }; 46 47 // Parameterization for loose mode behavior. In loose mode, we can't use the ASCII 48 // table below since loose mode allows "$100" to break after '$' in content marked as CJK. 49 enum LooseBehavior { 50 NonLooseMode, 51 LooseMode, 52 }; 53 54 template<NBSPBehavior nbspBehavior> 42 55 static inline bool isBreakableSpace(UChar ch) 43 56 { … … 48 61 return true; 49 62 case noBreakSpace: 50 return treatNoBreakSpaceAsBreak;63 return (nbspBehavior == TreatNBSPAsBreak); 51 64 default: 52 65 return false; … … 140 153 } 141 154 142 template< bool treatNoBreakSpaceAsBreak>155 template<NBSPBehavior nbspBehavior> 143 156 inline bool needsLineBreakIterator(UChar ch) 144 157 { 145 if ( treatNoBreakSpaceAsBreak)158 if (nbspBehavior == TreatNBSPAsBreak) 146 159 return ch > asciiLineBreakTableLastChar; 147 160 return ch > asciiLineBreakTableLastChar && ch != noBreakSpace; 148 161 } 149 162 150 template<typename CharacterType, bool treatNoBreakSpaceAsBreak>163 template<typename CharacterType, NBSPBehavior nbspBehavior, LooseBehavior looseBehavior> 151 164 static inline int nextBreakablePosition(LazyLineBreakIterator& lazyBreakIterator, const CharacterType* str, unsigned length, int pos) 152 165 { … … 159 172 CharacterType ch = str[i]; 160 173 161 if (isBreakableSpace<treatNoBreakSpaceAsBreak>(ch) || shouldBreakAfter(lastLastCh, lastCh, ch)) 174 // Don't use ASCII shortcut (shouldBreakAfter) if loose mode. 175 if (isBreakableSpace<nbspBehavior>(ch) || ((looseBehavior != LooseMode) && shouldBreakAfter(lastLastCh, lastCh, ch))) 162 176 return i; 163 177 164 if (needsLineBreakIterator<treatNoBreakSpaceAsBreak>(ch) || needsLineBreakIterator<treatNoBreakSpaceAsBreak>(lastCh)) { 178 // Always use line break iterator if loose mode. 179 if ((looseBehavior == LooseMode) || needsLineBreakIterator<nbspBehavior>(ch) || needsLineBreakIterator<nbspBehavior>(lastCh)) { 165 180 if (nextBreak < i && i) { 166 181 TextBreakIterator* breakIterator = lazyBreakIterator.get(); … … 168 183 nextBreak = textBreakFollowing(breakIterator, i - 1); 169 184 } 170 if (i == nextBreak && !isBreakableSpace< treatNoBreakSpaceAsBreak>(lastCh))185 if (i == nextBreak && !isBreakableSpace<nbspBehavior>(lastCh)) 171 186 return i; 172 187 } … … 183 198 String string = lazyBreakIterator.string(); 184 199 if (string.is8Bit()) 185 return nextBreakablePosition<LChar, false>(lazyBreakIterator, string.characters8(), string.length(), pos); 186 return nextBreakablePosition<UChar, false>(lazyBreakIterator, string.characters16(), string.length(), pos); 200 return nextBreakablePosition<LChar, IgnoreNBSP, NonLooseMode>(lazyBreakIterator, string.characters8(), string.length(), pos); 201 return nextBreakablePosition<UChar, IgnoreNBSP, NonLooseMode>(lazyBreakIterator, string.characters16(), string.length(), pos); 202 } 203 204 int nextBreakablePositionIgnoringNBSPLoose(LazyLineBreakIterator& lazyBreakIterator, int pos) 205 { 206 String string = lazyBreakIterator.string(); 207 if (string.is8Bit()) 208 return nextBreakablePosition<LChar, IgnoreNBSP, LooseMode>(lazyBreakIterator, string.characters8(), string.length(), pos); 209 return nextBreakablePosition<UChar, IgnoreNBSP, LooseMode>(lazyBreakIterator, string.characters16(), string.length(), pos); 187 210 } 188 211 … … 191 214 String string = lazyBreakIterator.string(); 192 215 if (string.is8Bit()) 193 return nextBreakablePosition<LChar, true>(lazyBreakIterator, string.characters8(), string.length(), pos); 194 return nextBreakablePosition<UChar, true>(lazyBreakIterator, string.characters16(), string.length(), pos); 216 return nextBreakablePosition<LChar, TreatNBSPAsBreak, NonLooseMode>(lazyBreakIterator, string.characters8(), string.length(), pos); 217 return nextBreakablePosition<UChar, TreatNBSPAsBreak, NonLooseMode>(lazyBreakIterator, string.characters16(), string.length(), pos); 218 } 219 220 int nextBreakablePositionLoose(LazyLineBreakIterator& lazyBreakIterator, int pos) 221 { 222 String string = lazyBreakIterator.string(); 223 if (string.is8Bit()) 224 return nextBreakablePosition<LChar, TreatNBSPAsBreak, LooseMode>(lazyBreakIterator, string.characters8(), string.length(), pos); 225 return nextBreakablePosition<UChar, TreatNBSPAsBreak, LooseMode>(lazyBreakIterator, string.characters16(), string.length(), pos); 195 226 } 196 227 -
trunk/Source/WebCore/rendering/break_lines.h
r133386 r133529 29 29 30 30 int nextBreakablePositionIgnoringNBSP(LazyLineBreakIterator&, int pos); 31 int nextBreakablePositionIgnoringNBSPLoose(LazyLineBreakIterator&, int pos); 31 32 int nextBreakablePosition(LazyLineBreakIterator&, int pos); 33 int nextBreakablePositionLoose(LazyLineBreakIterator&, int pos); 32 34 33 inline bool isBreakable(LazyLineBreakIterator& lazyBreakIterator, int pos, int& nextBreakable, bool breakNBSP )35 inline bool isBreakable(LazyLineBreakIterator& lazyBreakIterator, int pos, int& nextBreakable, bool breakNBSP, bool isLooseMode) 34 36 { 35 37 if (pos > nextBreakable) { 36 if (breakNBSP) 37 nextBreakable = nextBreakablePosition(lazyBreakIterator, pos); 38 else 39 nextBreakable = nextBreakablePositionIgnoringNBSP(lazyBreakIterator, pos); 38 if (isLooseMode) { 39 if (breakNBSP) 40 nextBreakable = nextBreakablePositionLoose(lazyBreakIterator, pos); 41 else 42 nextBreakable = nextBreakablePositionIgnoringNBSPLoose(lazyBreakIterator, pos); 43 } else { 44 if (breakNBSP) 45 nextBreakable = nextBreakablePosition(lazyBreakIterator, pos); 46 else 47 nextBreakable = nextBreakablePositionIgnoringNBSP(lazyBreakIterator, pos); 48 } 40 49 } 41 50 return pos == nextBreakable;
Note: See TracChangeset
for help on using the changeset viewer.