Changeset 61915 in webkit
- Timestamp:
- Jun 25, 2010 4:36:37 PM (14 years ago)
- Location:
- trunk/WebCore
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/WebCore/ChangeLog
r61914 r61915 1 2010-06-25 Adam Barth <abarth@webkit.org> 2 3 Reviewed by Eric Seidel. 4 5 HTMLTreeBuilder should branch first on token type and then on insertion mode 6 https://bugs.webkit.org/show_bug.cgi?id=41232 7 8 This is different than how the spec is written, but it lets us remove a 9 lot of redudancy in the algorithm. We might even want to pull some of 10 the branches on token name outside the insertion mode branch, but I'll 11 leave that for a future patch. 12 13 Although this looks like a big patch, it's mostly just a mechanical 14 switch permutation. 15 16 * html/HTMLTreeBuilder.cpp: 17 (WebCore::HTMLTreeBuilder::constructTreeFromToken): 18 (WebCore::HTMLTreeBuilder::processToken): 19 (WebCore::HTMLTreeBuilder::processDoctypeToken): 20 (WebCore::HTMLTreeBuilder::processStartTag): 21 (WebCore::HTMLTreeBuilder::processEndTag): 22 (WebCore::HTMLTreeBuilder::processComment): 23 (WebCore::HTMLTreeBuilder::processCharacter): 24 (WebCore::HTMLTreeBuilder::processEndOfFile): 25 (WebCore::HTMLTreeBuilder::processDefaultForInitialMode): 26 (WebCore::HTMLTreeBuilder::processDefaultForBeforeHTMLMode): 27 (WebCore::HTMLTreeBuilder::processDefaultForBeforeHeadMode): 28 (WebCore::HTMLTreeBuilder::processDefaultForInHeadMode): 29 (WebCore::HTMLTreeBuilder::processDefaultForInHeadNoscriptMode): 30 (WebCore::HTMLTreeBuilder::processDefaultForAfterHeadMode): 31 * html/HTMLTreeBuilder.h: 32 1 33 2010-06-25 Adam Barth <abarth@webkit.org> 2 34 -
trunk/WebCore/html/HTMLTreeBuilder.cpp
r61914 r61915 226 226 227 227 AtomicHTMLToken token(rawToken); 228 229 // HTML5 expects the tokenizer to call the parser every time a character is 230 // emitted. We instead collect characters and call the parser with a batch. 231 // In order to make our first-pass parser code simple, processToken matches 232 // the spec in only handling one character at a time. 233 if (token.type() == HTMLToken::Character) { 234 StringImpl* characters = token.characters().impl(); 235 // FIXME: Calling processToken for each character is probably slow. 236 for (unsigned i = 0; i < characters->length(); ++i) 237 processToken(token, (*characters)[i]); 228 return processToken(token); 229 } 230 231 void HTMLTreeBuilder::processToken(AtomicHTMLToken& token) 232 { 233 switch (token.type()) { 234 case HTMLToken::Uninitialized: 235 ASSERT_NOT_REACHED(); 236 break; 237 case HTMLToken::DOCTYPE: 238 processDoctypeToken(token); 239 break; 240 case HTMLToken::StartTag: 241 processStartTag(token); 242 break; 243 case HTMLToken::EndTag: 244 processEndTag(token); 245 break; 246 case HTMLToken::Comment: 247 processComment(token); 238 248 return; 239 } 240 return processToken(token); 241 } 242 243 void HTMLTreeBuilder::processToken(AtomicHTMLToken& token, UChar cc) 244 { 245 reprocessToken: 249 case HTMLToken::Character: 250 processCharacter(token); 251 break; 252 case HTMLToken::EndOfFile: 253 processEndOfFile(token); 254 break; 255 } 256 } 257 258 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token) 259 { 260 if (insertionMode() == InitialMode) { 261 insertDoctype(token); 262 return; 263 } 264 parseError(token); 265 } 266 267 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token) 268 { 246 269 switch (insertionMode()) { 247 case InitialMode: { 248 switch (token.type()) { 249 case HTMLToken::Uninitialized: 250 ASSERT_NOT_REACHED(); 251 break; 252 case HTMLToken::DOCTYPE: 253 insertDoctype(token); 254 return; 255 case HTMLToken::Comment: 256 insertComment(token); 257 return; 258 case HTMLToken::Character: 259 if (isTreeBuilderWhiteSpace(cc)) 260 return; 261 break; 262 case HTMLToken::StartTag: 263 case HTMLToken::EndTag: 264 case HTMLToken::EndOfFile: 265 break; 266 } 267 notImplemented(); 268 parseError(token); 269 setInsertionMode(BeforeHTMLMode); 270 goto reprocessToken; 271 } 272 case BeforeHTMLMode: { 273 switch (token.type()) { 274 case HTMLToken::Uninitialized: 275 ASSERT_NOT_REACHED(); 276 break; 277 case HTMLToken::DOCTYPE: 278 parseError(token); 279 return; 280 case HTMLToken::Comment: 281 insertComment(token); 282 return; 283 case HTMLToken::Character: 284 if (isTreeBuilderWhiteSpace(cc)) 285 return; 286 break; 287 case HTMLToken::StartTag: 288 if (token.name() == htmlTag) { 289 notImplemented(); 290 setInsertionMode(BeforeHeadMode); 291 return; 292 } 293 break; 294 case HTMLToken::EndTag: 295 if (token.name() == headTag || token.name() == bodyTag || token.name() == htmlTag || token.name() == brTag) 296 break; 297 parseError(token); 298 return; 299 case HTMLToken::EndOfFile: 300 break; 301 } 302 notImplemented(); 303 setInsertionMode(BeforeHeadMode); 304 goto reprocessToken; 305 } 306 case BeforeHeadMode: { 307 switch (token.type()) { 308 case HTMLToken::Uninitialized: 309 ASSERT_NOT_REACHED(); 310 break; 311 case HTMLToken::Character: 312 if (isTreeBuilderWhiteSpace(cc)) 313 return; 314 break; 315 case HTMLToken::Comment: 316 insertComment(token); 317 return; 318 case HTMLToken::DOCTYPE: 319 parseError(token); 320 return; 321 case HTMLToken::StartTag: 322 if (token.name() == htmlTag) { 323 notImplemented(); 324 return; 325 } 326 if (token.name() == headTag) { 327 m_headElement = insertElement(token); 328 setInsertionMode(InHeadMode); 329 return; 330 } 331 break; 332 case HTMLToken::EndTag: 333 if (token.name() == headTag || token.name() == bodyTag || token.name() == brTag) { 334 AtomicHTMLToken fakeHead(HTMLToken::StartTag, headTag.localName()); 335 processToken(fakeHead); 336 goto reprocessToken; 337 } 338 parseError(token); 339 return; 340 case HTMLToken::EndOfFile: 341 break; 342 } 343 AtomicHTMLToken fakeHead(HTMLToken::StartTag, headTag.localName()); 344 processToken(fakeHead); 345 goto reprocessToken; 346 } 347 case InHeadMode: { 348 switch (token.type()) { 349 case HTMLToken::Uninitialized: 350 ASSERT_NOT_REACHED(); 351 break; 352 case HTMLToken::Character: 353 insertCharacter(cc); 354 return; 355 case HTMLToken::Comment: 356 insertComment(token); 357 return; 358 case HTMLToken::DOCTYPE: 359 parseError(token); 360 return; 361 case HTMLToken::StartTag: 362 if (token.name() == htmlTag) { 363 notImplemented(); 364 return; 365 } 366 // FIXME: Atomize "command". 367 if (token.name() == baseTag || token.name() == "command" || token.name() == linkTag) { 368 insertElement(token); 369 m_openElements.pop(); 370 notImplemented(); 371 } 372 if (token.name() == metaTag) { 373 insertElement(token); 374 m_openElements.pop(); 375 notImplemented(); 376 } 377 if (token.name() == titleTag) { 378 insertGenericRCDATAElement(token); 379 return; 380 } 381 if (token.name() == noscriptTag) { 382 if (isScriptingFlagEnabled(m_document->frame())) { 383 insertGenericRawTextElement(token); 384 return; 385 } 386 insertElement(token); 387 setInsertionMode(InHeadNoscriptMode); 388 } 389 if (token.name() == noframesTag || token.name() == styleTag) { 270 case InitialMode: 271 ASSERT(insertionMode() == InitialMode); 272 processDefaultForInitialMode(token); 273 // Fall through. 274 case BeforeHTMLMode: 275 ASSERT(insertionMode() == BeforeHTMLMode); 276 if (token.name() == htmlTag) { 277 notImplemented(); 278 setInsertionMode(BeforeHeadMode); 279 return; 280 } 281 processDefaultForBeforeHTMLMode(token); 282 // Fall through. 283 case BeforeHeadMode: 284 ASSERT(insertionMode() == BeforeHeadMode); 285 if (token.name() == htmlTag) { 286 notImplemented(); 287 return; 288 } 289 if (token.name() == headTag) { 290 m_headElement = insertElement(token); 291 setInsertionMode(InHeadMode); 292 return; 293 } 294 processDefaultForBeforeHeadMode(token); 295 // Fall through. 296 case InHeadMode: 297 ASSERT(insertionMode() == InHeadMode); 298 if (token.name() == htmlTag) { 299 notImplemented(); 300 return; 301 } 302 // FIXME: Atomize "command". 303 if (token.name() == baseTag || token.name() == "command" || token.name() == linkTag) { 304 insertElement(token); 305 m_openElements.pop(); 306 notImplemented(); 307 return; 308 } 309 if (token.name() == metaTag) { 310 insertElement(token); 311 m_openElements.pop(); 312 notImplemented(); 313 return; 314 } 315 if (token.name() == titleTag) { 316 insertGenericRCDATAElement(token); 317 return; 318 } 319 if (token.name() == noscriptTag) { 320 if (isScriptingFlagEnabled(m_document->frame())) { 390 321 insertGenericRawTextElement(token); 391 322 return; 392 323 } 393 if (token.name() == scriptTag) { 394 insertScriptElement(token); 395 return; 396 } 397 if (token.name() == headTag) { 398 notImplemented(); 399 return; 400 } 401 break; 402 case HTMLToken::EndTag: 403 if (token.name() == headTag) { 404 ASSERT(m_openElements.top()->tagQName() == headTag); 405 m_openElements.pop(); 406 setInsertionMode(AfterHeadMode); 407 return; 408 } 409 if (token.name() == bodyTag || token.name() == htmlTag || token.name() == brTag) 410 break; 411 parseError(token); 412 return; 413 break; 414 case HTMLToken::EndOfFile: 415 break; 416 } 417 AtomicHTMLToken fakeHead(HTMLToken::EndTag, headTag.localName()); 418 processToken(fakeHead); 419 goto reprocessToken; 420 } 421 case InHeadNoscriptMode: { 422 switch (token.type()) { 423 case HTMLToken::Uninitialized: 424 ASSERT_NOT_REACHED(); 425 break; 426 case HTMLToken::DOCTYPE: 427 parseError(token); 428 return; 429 case HTMLToken::StartTag: 430 if (token.name() == htmlTag) { 431 notImplemented(); 432 return; 433 } 434 if (token.name() == linkTag || token.name() == metaTag || token.name() == noframesTag || token.name() == styleTag) { 435 notImplemented(); 436 return; 437 } 438 if (token.name() == htmlTag || token.name() == noscriptTag) { 439 parseError(token); 440 return; 441 } 442 break; 443 case HTMLToken::EndTag: 444 if (token.name() == noscriptTag) { 445 ASSERT(m_openElements.top()->tagQName() == noscriptTag); 446 m_openElements.pop(); 447 ASSERT(m_openElements.top()->tagQName() == headTag); 448 setInsertionMode(InHeadMode); 449 return; 450 } 451 if (token.name() == brTag) 452 break; 453 parseError(token); 454 return; 455 case HTMLToken::Character: 456 notImplemented(); 457 break; 458 case HTMLToken::Comment: 459 notImplemented(); 460 return; 461 case HTMLToken::EndOfFile: 462 break; 463 } 464 AtomicHTMLToken fakeNoscript(HTMLToken::EndTag, noscriptTag.localName()); 465 processToken(fakeNoscript); 466 goto reprocessToken; 467 } 468 case AfterHeadMode: { 469 switch (token.type()) { 470 case HTMLToken::Uninitialized: 471 ASSERT_NOT_REACHED(); 472 break; 473 case HTMLToken::Character: 474 if (isTreeBuilderWhiteSpace(cc)) { 475 insertCharacter(cc); 476 return; 477 } 478 break; 479 case HTMLToken::Comment: 480 insertComment(token); 481 return; 482 case HTMLToken::DOCTYPE: 483 parseError(token); 484 return; 485 case HTMLToken::StartTag: 486 if (token.name() == htmlTag) { 487 notImplemented(); 488 return; 489 } 490 if (token.name() == bodyTag) { 491 m_framesetOk = false; 492 insertElement(token); 493 return; 494 } 495 if (token.name() == framesetTag) { 496 insertElement(token); 497 setInsertionMode(InFramesetMode); 498 return; 499 } 500 if (token.name() == baseTag || token.name() == linkTag || token.name() == metaTag || token.name() == noframesTag || token.name() == scriptTag || token.name() == styleTag || token.name() == titleTag) { 501 parseError(token); 502 ASSERT(m_headElement); 503 m_openElements.push(m_headElement.get()); 504 notImplemented(); 505 m_openElements.remove(m_headElement.get()); 506 return; 507 } 508 if (token.name() == headTag) { 509 parseError(token); 510 return; 511 } 512 break; 513 case HTMLToken::EndTag: 514 if (token.name() == bodyTag || token.name() == htmlTag || token.name() == brTag) 515 break; 516 parseError(token); 517 return; 518 case HTMLToken::EndOfFile: 519 break; 520 } 521 AtomicHTMLToken fakeBody(HTMLToken::StartTag, bodyTag.localName()); 522 processToken(fakeBody); 523 m_framesetOk = true; 524 goto reprocessToken; 525 } 324 insertElement(token); 325 setInsertionMode(InHeadNoscriptMode); 326 return; 327 } 328 if (token.name() == noframesTag || token.name() == styleTag) { 329 insertGenericRawTextElement(token); 330 return; 331 } 332 if (token.name() == scriptTag) { 333 insertScriptElement(token); 334 return; 335 } 336 if (token.name() == headTag) { 337 notImplemented(); 338 return; 339 } 340 processDefaultForInHeadMode(token); 341 // Fall through. 342 case AfterHeadMode: 343 ASSERT(insertionMode() == AfterHeadMode); 344 if (token.name() == htmlTag) { 345 notImplemented(); 346 return; 347 } 348 if (token.name() == bodyTag) { 349 m_framesetOk = false; 350 insertElement(token); 351 return; 352 } 353 if (token.name() == framesetTag) { 354 insertElement(token); 355 setInsertionMode(InFramesetMode); 356 return; 357 } 358 if (token.name() == baseTag || token.name() == linkTag || token.name() == metaTag || token.name() == noframesTag || token.name() == scriptTag || token.name() == styleTag || token.name() == titleTag) { 359 parseError(token); 360 ASSERT(m_headElement); 361 m_openElements.push(m_headElement.get()); 362 notImplemented(); 363 m_openElements.remove(m_headElement.get()); 364 return; 365 } 366 if (token.name() == headTag) { 367 parseError(token); 368 return; 369 } 370 processDefaultForAfterHeadMode(token); 371 // Fall through 526 372 case InBodyMode: 527 case TextMode: 528 case InTableMode: 529 case InTableTextMode: 530 case InCaptionMode: 531 case InColumnGroupMode: 532 case InTableBodyMode: 533 case InRowMode: 534 case InCellMode: 535 case InSelectMode: 536 case InSelectInTableMode: 537 case InForeignContentMode: 538 case AfterBodyMode: 539 case InFramesetMode: 540 case AfterFramesetMode: 541 case AfterAfterBodyMode: 542 case AfterAfterFramesetMode: 543 notImplemented(); 544 ASSERT_NOT_REACHED(); 545 } 546 ASSERT_NOT_REACHED(); 373 ASSERT(insertionMode() == InBodyMode); 374 notImplemented(); 375 break; 376 case InHeadNoscriptMode: 377 ASSERT(insertionMode() == InHeadNoscriptMode); 378 if (token.name() == htmlTag) { 379 notImplemented(); 380 return; 381 } 382 if (token.name() == linkTag || token.name() == metaTag || token.name() == noframesTag || token.name() == styleTag) { 383 notImplemented(); 384 return; 385 } 386 if (token.name() == htmlTag || token.name() == noscriptTag) { 387 parseError(token); 388 return; 389 } 390 processDefaultForInHeadNoscriptMode(token); 391 processToken(token); 392 default: 393 notImplemented(); 394 } 395 } 396 397 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token) 398 { 399 switch (insertionMode()) { 400 case InitialMode: 401 ASSERT(insertionMode() == InitialMode); 402 processDefaultForInitialMode(token); 403 // Fall through. 404 case BeforeHTMLMode: 405 ASSERT(insertionMode() == BeforeHTMLMode); 406 if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) { 407 parseError(token); 408 return; 409 } 410 processDefaultForBeforeHTMLMode(token); 411 // Fall through. 412 case BeforeHeadMode: 413 ASSERT(insertionMode() == BeforeHeadMode); 414 if (token.name() != headTag && token.name() != bodyTag && token.name() != brTag) { 415 parseError(token); 416 return; 417 } 418 processDefaultForBeforeHeadMode(token); 419 // Fall through. 420 case InHeadMode: 421 ASSERT(insertionMode() == InHeadMode); 422 if (token.name() == headTag) { 423 ASSERT(m_openElements.top()->tagQName() == headTag); 424 m_openElements.pop(); 425 setInsertionMode(AfterHeadMode); 426 return; 427 } 428 if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) { 429 parseError(token); 430 return; 431 } 432 processDefaultForInHeadMode(token); 433 // Fall through. 434 case AfterHeadMode: 435 ASSERT(insertionMode() == AfterHeadMode); 436 if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) { 437 parseError(token); 438 return; 439 } 440 processDefaultForAfterHeadMode(token); 441 // Fall through 442 case InBodyMode: 443 ASSERT(insertionMode() == InBodyMode); 444 notImplemented(); 445 break; 446 case InHeadNoscriptMode: 447 ASSERT(insertionMode() == InHeadNoscriptMode); 448 if (token.name() == noscriptTag) { 449 ASSERT(m_openElements.top()->tagQName() == noscriptTag); 450 m_openElements.pop(); 451 ASSERT(m_openElements.top()->tagQName() == headTag); 452 setInsertionMode(InHeadMode); 453 return; 454 } 455 if (token.name() != brTag) { 456 parseError(token); 457 return; 458 } 459 processDefaultForInHeadNoscriptMode(token); 460 processToken(token); 461 default: 462 notImplemented(); 463 } 464 } 465 466 void HTMLTreeBuilder::processComment(AtomicHTMLToken& token) 467 { 468 if (insertionMode() == InHeadNoscriptMode) { 469 notImplemented(); 470 return; 471 } 472 insertComment(token); 473 } 474 475 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken&) 476 { 477 // FIXME: We need to figure out how to handle each character individually. 478 notImplemented(); 479 } 480 481 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token) 482 { 483 switch (insertionMode()) { 484 case InitialMode: 485 ASSERT(insertionMode() == InitialMode); 486 processDefaultForInitialMode(token); 487 // Fall through. 488 case BeforeHTMLMode: 489 ASSERT(insertionMode() == BeforeHTMLMode); 490 processDefaultForBeforeHTMLMode(token); 491 // Fall through. 492 case BeforeHeadMode: 493 ASSERT(insertionMode() == BeforeHeadMode); 494 processDefaultForBeforeHeadMode(token); 495 // Fall through. 496 case InHeadMode: 497 ASSERT(insertionMode() == InHeadMode); 498 processDefaultForInHeadMode(token); 499 // Fall through. 500 case AfterHeadMode: 501 ASSERT(insertionMode() == AfterHeadMode); 502 processDefaultForAfterHeadMode(token); 503 // Fall through 504 case InBodyMode: 505 ASSERT(insertionMode() == InBodyMode); 506 notImplemented(); 507 break; 508 case InHeadNoscriptMode: 509 ASSERT(insertionMode() == InHeadNoscriptMode); 510 processDefaultForInHeadNoscriptMode(token); 511 processToken(token); 512 default: 513 notImplemented(); 514 } 515 } 516 517 void HTMLTreeBuilder::processDefaultForInitialMode(AtomicHTMLToken& token) 518 { 519 notImplemented(); 520 parseError(token); 521 setInsertionMode(BeforeHTMLMode); 522 } 523 524 void HTMLTreeBuilder::processDefaultForBeforeHTMLMode(AtomicHTMLToken&) 525 { 526 notImplemented(); 527 setInsertionMode(BeforeHeadMode); 528 } 529 530 void HTMLTreeBuilder::processDefaultForBeforeHeadMode(AtomicHTMLToken&) 531 { 532 AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName()); 533 processStartTag(startHead); 534 } 535 536 void HTMLTreeBuilder::processDefaultForInHeadMode(AtomicHTMLToken&) 537 { 538 AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName()); 539 processEndTag(endHead); 540 } 541 542 void HTMLTreeBuilder::processDefaultForInHeadNoscriptMode(AtomicHTMLToken&) 543 { 544 AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName()); 545 processEndTag(endNoscript); 546 } 547 548 void HTMLTreeBuilder::processDefaultForAfterHeadMode(AtomicHTMLToken&) 549 { 550 AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName()); 551 processStartTag(startBody); 552 m_framesetOk = true; 547 553 } 548 554 -
trunk/WebCore/html/HTMLTreeBuilder.h
r61914 r61915 109 109 110 110 void passTokenToLegacyParser(HTMLToken&); 111 void processToken(AtomicHTMLToken&, UChar cc = 0); 111 112 // Specialized functions for processing the different types of tokens. 113 void processToken(AtomicHTMLToken&); 114 void processDoctypeToken(AtomicHTMLToken&); 115 void processStartTag(AtomicHTMLToken&); 116 void processEndTag(AtomicHTMLToken&); 117 void processComment(AtomicHTMLToken&); 118 void processCharacter(AtomicHTMLToken&); 119 void processEndOfFile(AtomicHTMLToken&); 120 121 // Default processing for the different insertion modes. 122 void processDefaultForInitialMode(AtomicHTMLToken&); 123 void processDefaultForBeforeHTMLMode(AtomicHTMLToken&); 124 void processDefaultForBeforeHeadMode(AtomicHTMLToken&); 125 void processDefaultForInHeadMode(AtomicHTMLToken&); 126 void processDefaultForInHeadNoscriptMode(AtomicHTMLToken&); 127 void processDefaultForAfterHeadMode(AtomicHTMLToken&); 112 128 113 129 void insertDoctype(AtomicHTMLToken&);
Note: See TracChangeset
for help on using the changeset viewer.