blob: d23e05e064227bfc670a4bdfe3a36509b81a5e21 [file] [log] [blame]
Matthias Andreas Benkard832a54e2019-01-29 09:27:38 +01001// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8 "errors"
9 "fmt"
10 "io"
11 "strings"
12
13 a "golang.org/x/net/html/atom"
14)
15
16// A parser implements the HTML5 parsing algorithm:
17// https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
18type parser struct {
19 // tokenizer provides the tokens for the parser.
20 tokenizer *Tokenizer
21 // tok is the most recently read token.
22 tok Token
23 // Self-closing tags like <hr/> are treated as start tags, except that
24 // hasSelfClosingToken is set while they are being processed.
25 hasSelfClosingToken bool
26 // doc is the document root element.
27 doc *Node
28 // The stack of open elements (section 12.2.4.2) and active formatting
29 // elements (section 12.2.4.3).
30 oe, afe nodeStack
31 // Element pointers (section 12.2.4.4).
32 head, form *Node
33 // Other parsing state flags (section 12.2.4.5).
34 scripting, framesetOK bool
35 // The stack of template insertion modes
36 templateStack insertionModeStack
37 // im is the current insertion mode.
38 im insertionMode
39 // originalIM is the insertion mode to go back to after completing a text
40 // or inTableText insertion mode.
41 originalIM insertionMode
42 // fosterParenting is whether new elements should be inserted according to
43 // the foster parenting rules (section 12.2.6.1).
44 fosterParenting bool
45 // quirks is whether the parser is operating in "quirks mode."
46 quirks bool
47 // fragment is whether the parser is parsing an HTML fragment.
48 fragment bool
49 // context is the context element when parsing an HTML fragment
50 // (section 12.4).
51 context *Node
52}
53
54func (p *parser) top() *Node {
55 if n := p.oe.top(); n != nil {
56 return n
57 }
58 return p.doc
59}
60
61// Stop tags for use in popUntil. These come from section 12.2.4.2.
62var (
63 defaultScopeStopTags = map[string][]a.Atom{
64 "": {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
65 "math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
66 "svg": {a.Desc, a.ForeignObject, a.Title},
67 }
68)
69
70type scope int
71
72const (
73 defaultScope scope = iota
74 listItemScope
75 buttonScope
76 tableScope
77 tableRowScope
78 tableBodyScope
79 selectScope
80)
81
82// popUntil pops the stack of open elements at the highest element whose tag
83// is in matchTags, provided there is no higher element in the scope's stop
84// tags (as defined in section 12.2.4.2). It returns whether or not there was
85// such an element. If there was not, popUntil leaves the stack unchanged.
86//
87// For example, the set of stop tags for table scope is: "html", "table". If
88// the stack was:
89// ["html", "body", "font", "table", "b", "i", "u"]
90// then popUntil(tableScope, "font") would return false, but
91// popUntil(tableScope, "i") would return true and the stack would become:
92// ["html", "body", "font", "table", "b"]
93//
94// If an element's tag is in both the stop tags and matchTags, then the stack
95// will be popped and the function returns true (provided, of course, there was
96// no higher element in the stack that was also in the stop tags). For example,
97// popUntil(tableScope, "table") returns true and leaves:
98// ["html", "body", "font"]
99func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
100 if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
101 p.oe = p.oe[:i]
102 return true
103 }
104 return false
105}
106
107// indexOfElementInScope returns the index in p.oe of the highest element whose
108// tag is in matchTags that is in scope. If no matching element is in scope, it
109// returns -1.
110func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
111 for i := len(p.oe) - 1; i >= 0; i-- {
112 tagAtom := p.oe[i].DataAtom
113 if p.oe[i].Namespace == "" {
114 for _, t := range matchTags {
115 if t == tagAtom {
116 return i
117 }
118 }
119 switch s {
120 case defaultScope:
121 // No-op.
122 case listItemScope:
123 if tagAtom == a.Ol || tagAtom == a.Ul {
124 return -1
125 }
126 case buttonScope:
127 if tagAtom == a.Button {
128 return -1
129 }
130 case tableScope:
131 if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
132 return -1
133 }
134 case selectScope:
135 if tagAtom != a.Optgroup && tagAtom != a.Option {
136 return -1
137 }
138 default:
139 panic("unreachable")
140 }
141 }
142 switch s {
143 case defaultScope, listItemScope, buttonScope:
144 for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
145 if t == tagAtom {
146 return -1
147 }
148 }
149 }
150 }
151 return -1
152}
153
154// elementInScope is like popUntil, except that it doesn't modify the stack of
155// open elements.
156func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
157 return p.indexOfElementInScope(s, matchTags...) != -1
158}
159
160// clearStackToContext pops elements off the stack of open elements until a
161// scope-defined element is found.
162func (p *parser) clearStackToContext(s scope) {
163 for i := len(p.oe) - 1; i >= 0; i-- {
164 tagAtom := p.oe[i].DataAtom
165 switch s {
166 case tableScope:
167 if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
168 p.oe = p.oe[:i+1]
169 return
170 }
171 case tableRowScope:
172 if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
173 p.oe = p.oe[:i+1]
174 return
175 }
176 case tableBodyScope:
177 if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
178 p.oe = p.oe[:i+1]
179 return
180 }
181 default:
182 panic("unreachable")
183 }
184 }
185}
186
187// generateImpliedEndTags pops nodes off the stack of open elements as long as
188// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
189// If exceptions are specified, nodes with that name will not be popped off.
190func (p *parser) generateImpliedEndTags(exceptions ...string) {
191 var i int
192loop:
193 for i = len(p.oe) - 1; i >= 0; i-- {
194 n := p.oe[i]
195 if n.Type == ElementNode {
196 switch n.DataAtom {
197 case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
198 for _, except := range exceptions {
199 if n.Data == except {
200 break loop
201 }
202 }
203 continue
204 }
205 }
206 break
207 }
208
209 p.oe = p.oe[:i+1]
210}
211
212// generateAllImpliedEndTags pops nodes off the stack of open elements as long as
213// the top node has a tag name of caption, colgroup, dd, div, dt, li, optgroup, option, p, rb,
214// rp, rt, rtc, span, tbody, td, tfoot, th, thead or tr.
215func (p *parser) generateAllImpliedEndTags() {
216 var i int
217 for i = len(p.oe) - 1; i >= 0; i-- {
218 n := p.oe[i]
219 if n.Type == ElementNode {
220 switch n.DataAtom {
221 // TODO: remove this divergence from the HTML5 spec
222 case a.Caption, a.Colgroup, a.Dd, a.Div, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb,
223 a.Rp, a.Rt, a.Rtc, a.Span, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
224 continue
225 }
226 }
227 break
228 }
229
230 p.oe = p.oe[:i+1]
231}
232
233// addChild adds a child node n to the top element, and pushes n onto the stack
234// of open elements if it is an element node.
235func (p *parser) addChild(n *Node) {
236 if p.shouldFosterParent() {
237 p.fosterParent(n)
238 } else {
239 p.top().AppendChild(n)
240 }
241
242 if n.Type == ElementNode {
243 p.oe = append(p.oe, n)
244 }
245}
246
247// shouldFosterParent returns whether the next node to be added should be
248// foster parented.
249func (p *parser) shouldFosterParent() bool {
250 if p.fosterParenting {
251 switch p.top().DataAtom {
252 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
253 return true
254 }
255 }
256 return false
257}
258
259// fosterParent adds a child node according to the foster parenting rules.
260// Section 12.2.6.1, "foster parenting".
261func (p *parser) fosterParent(n *Node) {
262 var table, parent, prev, template *Node
263 var i int
264 for i = len(p.oe) - 1; i >= 0; i-- {
265 if p.oe[i].DataAtom == a.Table {
266 table = p.oe[i]
267 break
268 }
269 }
270
271 var j int
272 for j = len(p.oe) - 1; j >= 0; j-- {
273 if p.oe[j].DataAtom == a.Template {
274 template = p.oe[j]
275 break
276 }
277 }
278
279 if template != nil && (table == nil || j < i) {
280 template.AppendChild(n)
281 return
282 }
283
284 if table == nil {
285 // The foster parent is the html element.
286 parent = p.oe[0]
287 } else {
288 parent = table.Parent
289 }
290 if parent == nil {
291 parent = p.oe[i-1]
292 }
293
294 if table != nil {
295 prev = table.PrevSibling
296 } else {
297 prev = parent.LastChild
298 }
299 if prev != nil && prev.Type == TextNode && n.Type == TextNode {
300 prev.Data += n.Data
301 return
302 }
303
304 parent.InsertBefore(n, table)
305}
306
307// addText adds text to the preceding node if it is a text node, or else it
308// calls addChild with a new text node.
309func (p *parser) addText(text string) {
310 if text == "" {
311 return
312 }
313
314 if p.shouldFosterParent() {
315 p.fosterParent(&Node{
316 Type: TextNode,
317 Data: text,
318 })
319 return
320 }
321
322 t := p.top()
323 if n := t.LastChild; n != nil && n.Type == TextNode {
324 n.Data += text
325 return
326 }
327 p.addChild(&Node{
328 Type: TextNode,
329 Data: text,
330 })
331}
332
333// addElement adds a child element based on the current token.
334func (p *parser) addElement() {
335 p.addChild(&Node{
336 Type: ElementNode,
337 DataAtom: p.tok.DataAtom,
338 Data: p.tok.Data,
339 Attr: p.tok.Attr,
340 })
341}
342
343// Section 12.2.4.3.
344func (p *parser) addFormattingElement() {
345 tagAtom, attr := p.tok.DataAtom, p.tok.Attr
346 p.addElement()
347
348 // Implement the Noah's Ark clause, but with three per family instead of two.
349 identicalElements := 0
350findIdenticalElements:
351 for i := len(p.afe) - 1; i >= 0; i-- {
352 n := p.afe[i]
353 if n.Type == scopeMarkerNode {
354 break
355 }
356 if n.Type != ElementNode {
357 continue
358 }
359 if n.Namespace != "" {
360 continue
361 }
362 if n.DataAtom != tagAtom {
363 continue
364 }
365 if len(n.Attr) != len(attr) {
366 continue
367 }
368 compareAttributes:
369 for _, t0 := range n.Attr {
370 for _, t1 := range attr {
371 if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
372 // Found a match for this attribute, continue with the next attribute.
373 continue compareAttributes
374 }
375 }
376 // If we get here, there is no attribute that matches a.
377 // Therefore the element is not identical to the new one.
378 continue findIdenticalElements
379 }
380
381 identicalElements++
382 if identicalElements >= 3 {
383 p.afe.remove(n)
384 }
385 }
386
387 p.afe = append(p.afe, p.top())
388}
389
390// Section 12.2.4.3.
391func (p *parser) clearActiveFormattingElements() {
392 for {
393 n := p.afe.pop()
394 if len(p.afe) == 0 || n.Type == scopeMarkerNode {
395 return
396 }
397 }
398}
399
400// Section 12.2.4.3.
401func (p *parser) reconstructActiveFormattingElements() {
402 n := p.afe.top()
403 if n == nil {
404 return
405 }
406 if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
407 return
408 }
409 i := len(p.afe) - 1
410 for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
411 if i == 0 {
412 i = -1
413 break
414 }
415 i--
416 n = p.afe[i]
417 }
418 for {
419 i++
420 clone := p.afe[i].clone()
421 p.addChild(clone)
422 p.afe[i] = clone
423 if i == len(p.afe)-1 {
424 break
425 }
426 }
427}
428
429// Section 12.2.5.
430func (p *parser) acknowledgeSelfClosingTag() {
431 p.hasSelfClosingToken = false
432}
433
434// An insertion mode (section 12.2.4.1) is the state transition function from
435// a particular state in the HTML5 parser's state machine. It updates the
436// parser's fields depending on parser.tok (where ErrorToken means EOF).
437// It returns whether the token was consumed.
438type insertionMode func(*parser) bool
439
440// setOriginalIM sets the insertion mode to return to after completing a text or
441// inTableText insertion mode.
442// Section 12.2.4.1, "using the rules for".
443func (p *parser) setOriginalIM() {
444 if p.originalIM != nil {
445 panic("html: bad parser state: originalIM was set twice")
446 }
447 p.originalIM = p.im
448}
449
450// Section 12.2.4.1, "reset the insertion mode".
451func (p *parser) resetInsertionMode() {
452 for i := len(p.oe) - 1; i >= 0; i-- {
453 n := p.oe[i]
454 last := i == 0
455 if last && p.context != nil {
456 n = p.context
457 }
458
459 switch n.DataAtom {
460 case a.Select:
461 if !last {
462 for ancestor, first := n, p.oe[0]; ancestor != first; {
463 if ancestor == first {
464 break
465 }
466 ancestor = p.oe[p.oe.index(ancestor)-1]
467 switch ancestor.DataAtom {
468 case a.Template:
469 p.im = inSelectIM
470 return
471 case a.Table:
472 p.im = inSelectInTableIM
473 return
474 }
475 }
476 }
477 p.im = inSelectIM
478 case a.Td, a.Th:
479 // TODO: remove this divergence from the HTML5 spec.
480 //
481 // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
482 p.im = inCellIM
483 case a.Tr:
484 p.im = inRowIM
485 case a.Tbody, a.Thead, a.Tfoot:
486 p.im = inTableBodyIM
487 case a.Caption:
488 p.im = inCaptionIM
489 case a.Colgroup:
490 p.im = inColumnGroupIM
491 case a.Table:
492 p.im = inTableIM
493 case a.Template:
494 p.im = p.templateStack.top()
495 case a.Head:
496 // TODO: remove this divergence from the HTML5 spec.
497 //
498 // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
499 p.im = inHeadIM
500 case a.Body:
501 p.im = inBodyIM
502 case a.Frameset:
503 p.im = inFramesetIM
504 case a.Html:
505 if p.head == nil {
506 p.im = beforeHeadIM
507 } else {
508 p.im = afterHeadIM
509 }
510 default:
511 if last {
512 p.im = inBodyIM
513 return
514 }
515 continue
516 }
517 return
518 }
519}
520
521const whitespace = " \t\r\n\f"
522
523// Section 12.2.6.4.1.
524func initialIM(p *parser) bool {
525 switch p.tok.Type {
526 case TextToken:
527 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
528 if len(p.tok.Data) == 0 {
529 // It was all whitespace, so ignore it.
530 return true
531 }
532 case CommentToken:
533 p.doc.AppendChild(&Node{
534 Type: CommentNode,
535 Data: p.tok.Data,
536 })
537 return true
538 case DoctypeToken:
539 n, quirks := parseDoctype(p.tok.Data)
540 p.doc.AppendChild(n)
541 p.quirks = quirks
542 p.im = beforeHTMLIM
543 return true
544 }
545 p.quirks = true
546 p.im = beforeHTMLIM
547 return false
548}
549
550// Section 12.2.6.4.2.
551func beforeHTMLIM(p *parser) bool {
552 switch p.tok.Type {
553 case DoctypeToken:
554 // Ignore the token.
555 return true
556 case TextToken:
557 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
558 if len(p.tok.Data) == 0 {
559 // It was all whitespace, so ignore it.
560 return true
561 }
562 case StartTagToken:
563 if p.tok.DataAtom == a.Html {
564 p.addElement()
565 p.im = beforeHeadIM
566 return true
567 }
568 case EndTagToken:
569 switch p.tok.DataAtom {
570 case a.Head, a.Body, a.Html, a.Br:
571 p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
572 return false
573 default:
574 // Ignore the token.
575 return true
576 }
577 case CommentToken:
578 p.doc.AppendChild(&Node{
579 Type: CommentNode,
580 Data: p.tok.Data,
581 })
582 return true
583 }
584 p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
585 return false
586}
587
588// Section 12.2.6.4.3.
589func beforeHeadIM(p *parser) bool {
590 switch p.tok.Type {
591 case TextToken:
592 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
593 if len(p.tok.Data) == 0 {
594 // It was all whitespace, so ignore it.
595 return true
596 }
597 case StartTagToken:
598 switch p.tok.DataAtom {
599 case a.Head:
600 p.addElement()
601 p.head = p.top()
602 p.im = inHeadIM
603 return true
604 case a.Html:
605 return inBodyIM(p)
606 }
607 case EndTagToken:
608 switch p.tok.DataAtom {
609 case a.Head, a.Body, a.Html, a.Br:
610 p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
611 return false
612 default:
613 // Ignore the token.
614 return true
615 }
616 case CommentToken:
617 p.addChild(&Node{
618 Type: CommentNode,
619 Data: p.tok.Data,
620 })
621 return true
622 case DoctypeToken:
623 // Ignore the token.
624 return true
625 }
626
627 p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
628 return false
629}
630
631// Section 12.2.6.4.4.
632func inHeadIM(p *parser) bool {
633 switch p.tok.Type {
634 case TextToken:
635 s := strings.TrimLeft(p.tok.Data, whitespace)
636 if len(s) < len(p.tok.Data) {
637 // Add the initial whitespace to the current node.
638 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
639 if s == "" {
640 return true
641 }
642 p.tok.Data = s
643 }
644 case StartTagToken:
645 switch p.tok.DataAtom {
646 case a.Html:
647 return inBodyIM(p)
648 case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta:
649 p.addElement()
650 p.oe.pop()
651 p.acknowledgeSelfClosingTag()
652 return true
653 case a.Script, a.Title, a.Noscript, a.Noframes, a.Style:
654 p.addElement()
655 p.setOriginalIM()
656 p.im = textIM
657 return true
658 case a.Head:
659 // Ignore the token.
660 return true
661 case a.Template:
662 p.addElement()
663 p.afe = append(p.afe, &scopeMarker)
664 p.framesetOK = false
665 p.im = inTemplateIM
666 p.templateStack = append(p.templateStack, inTemplateIM)
667 return true
668 }
669 case EndTagToken:
670 switch p.tok.DataAtom {
671 case a.Head:
672 p.oe.pop()
673 p.im = afterHeadIM
674 return true
675 case a.Body, a.Html, a.Br:
676 p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
677 return false
678 case a.Template:
679 if !p.oe.contains(a.Template) {
680 return true
681 }
682 p.generateAllImpliedEndTags()
683 if n := p.oe.top(); n.DataAtom != a.Template {
684 return true
685 }
686 p.popUntil(defaultScope, a.Template)
687 p.clearActiveFormattingElements()
688 p.templateStack.pop()
689 p.resetInsertionMode()
690 return true
691 default:
692 // Ignore the token.
693 return true
694 }
695 case CommentToken:
696 p.addChild(&Node{
697 Type: CommentNode,
698 Data: p.tok.Data,
699 })
700 return true
701 case DoctypeToken:
702 // Ignore the token.
703 return true
704 }
705
706 p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
707 return false
708}
709
710// Section 12.2.6.4.6.
711func afterHeadIM(p *parser) bool {
712 switch p.tok.Type {
713 case TextToken:
714 s := strings.TrimLeft(p.tok.Data, whitespace)
715 if len(s) < len(p.tok.Data) {
716 // Add the initial whitespace to the current node.
717 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
718 if s == "" {
719 return true
720 }
721 p.tok.Data = s
722 }
723 case StartTagToken:
724 switch p.tok.DataAtom {
725 case a.Html:
726 return inBodyIM(p)
727 case a.Body:
728 p.addElement()
729 p.framesetOK = false
730 p.im = inBodyIM
731 return true
732 case a.Frameset:
733 p.addElement()
734 p.im = inFramesetIM
735 return true
736 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
737 p.oe = append(p.oe, p.head)
738 defer p.oe.remove(p.head)
739 return inHeadIM(p)
740 case a.Head:
741 // Ignore the token.
742 return true
743 }
744 case EndTagToken:
745 switch p.tok.DataAtom {
746 case a.Body, a.Html, a.Br:
747 // Drop down to creating an implied <body> tag.
748 case a.Template:
749 return inHeadIM(p)
750 default:
751 // Ignore the token.
752 return true
753 }
754 case CommentToken:
755 p.addChild(&Node{
756 Type: CommentNode,
757 Data: p.tok.Data,
758 })
759 return true
760 case DoctypeToken:
761 // Ignore the token.
762 return true
763 }
764
765 p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
766 p.framesetOK = true
767 return false
768}
769
770// copyAttributes copies attributes of src not found on dst to dst.
771func copyAttributes(dst *Node, src Token) {
772 if len(src.Attr) == 0 {
773 return
774 }
775 attr := map[string]string{}
776 for _, t := range dst.Attr {
777 attr[t.Key] = t.Val
778 }
779 for _, t := range src.Attr {
780 if _, ok := attr[t.Key]; !ok {
781 dst.Attr = append(dst.Attr, t)
782 attr[t.Key] = t.Val
783 }
784 }
785}
786
787// Section 12.2.6.4.7.
788func inBodyIM(p *parser) bool {
789 switch p.tok.Type {
790 case TextToken:
791 d := p.tok.Data
792 switch n := p.oe.top(); n.DataAtom {
793 case a.Pre, a.Listing:
794 if n.FirstChild == nil {
795 // Ignore a newline at the start of a <pre> block.
796 if d != "" && d[0] == '\r' {
797 d = d[1:]
798 }
799 if d != "" && d[0] == '\n' {
800 d = d[1:]
801 }
802 }
803 }
804 d = strings.Replace(d, "\x00", "", -1)
805 if d == "" {
806 return true
807 }
808 p.reconstructActiveFormattingElements()
809 p.addText(d)
810 if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
811 // There were non-whitespace characters inserted.
812 p.framesetOK = false
813 }
814 case StartTagToken:
815 switch p.tok.DataAtom {
816 case a.Html:
817 if p.oe.contains(a.Template) {
818 return true
819 }
820 copyAttributes(p.oe[0], p.tok)
821 case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
822 return inHeadIM(p)
823 case a.Body:
824 if p.oe.contains(a.Template) {
825 return true
826 }
827 if len(p.oe) >= 2 {
828 body := p.oe[1]
829 if body.Type == ElementNode && body.DataAtom == a.Body {
830 p.framesetOK = false
831 copyAttributes(body, p.tok)
832 }
833 }
834 case a.Frameset:
835 if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
836 // Ignore the token.
837 return true
838 }
839 body := p.oe[1]
840 if body.Parent != nil {
841 body.Parent.RemoveChild(body)
842 }
843 p.oe = p.oe[:1]
844 p.addElement()
845 p.im = inFramesetIM
846 return true
847 case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
848 p.popUntil(buttonScope, a.P)
849 p.addElement()
850 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
851 p.popUntil(buttonScope, a.P)
852 switch n := p.top(); n.DataAtom {
853 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
854 p.oe.pop()
855 }
856 p.addElement()
857 case a.Pre, a.Listing:
858 p.popUntil(buttonScope, a.P)
859 p.addElement()
860 // The newline, if any, will be dealt with by the TextToken case.
861 p.framesetOK = false
862 case a.Form:
863 if p.oe.contains(a.Template) || p.form == nil {
864 p.popUntil(buttonScope, a.P)
865 p.addElement()
866 p.form = p.top()
867 }
868 case a.Li:
869 p.framesetOK = false
870 for i := len(p.oe) - 1; i >= 0; i-- {
871 node := p.oe[i]
872 switch node.DataAtom {
873 case a.Li:
874 p.oe = p.oe[:i]
875 case a.Address, a.Div, a.P:
876 continue
877 default:
878 if !isSpecialElement(node) {
879 continue
880 }
881 }
882 break
883 }
884 p.popUntil(buttonScope, a.P)
885 p.addElement()
886 case a.Dd, a.Dt:
887 p.framesetOK = false
888 for i := len(p.oe) - 1; i >= 0; i-- {
889 node := p.oe[i]
890 switch node.DataAtom {
891 case a.Dd, a.Dt:
892 p.oe = p.oe[:i]
893 case a.Address, a.Div, a.P:
894 continue
895 default:
896 if !isSpecialElement(node) {
897 continue
898 }
899 }
900 break
901 }
902 p.popUntil(buttonScope, a.P)
903 p.addElement()
904 case a.Plaintext:
905 p.popUntil(buttonScope, a.P)
906 p.addElement()
907 case a.Button:
908 p.popUntil(defaultScope, a.Button)
909 p.reconstructActiveFormattingElements()
910 p.addElement()
911 p.framesetOK = false
912 case a.A:
913 for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
914 if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
915 p.inBodyEndTagFormatting(a.A)
916 p.oe.remove(n)
917 p.afe.remove(n)
918 break
919 }
920 }
921 p.reconstructActiveFormattingElements()
922 p.addFormattingElement()
923 case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
924 p.reconstructActiveFormattingElements()
925 p.addFormattingElement()
926 case a.Nobr:
927 p.reconstructActiveFormattingElements()
928 if p.elementInScope(defaultScope, a.Nobr) {
929 p.inBodyEndTagFormatting(a.Nobr)
930 p.reconstructActiveFormattingElements()
931 }
932 p.addFormattingElement()
933 case a.Applet, a.Marquee, a.Object:
934 p.reconstructActiveFormattingElements()
935 p.addElement()
936 p.afe = append(p.afe, &scopeMarker)
937 p.framesetOK = false
938 case a.Table:
939 if !p.quirks {
940 p.popUntil(buttonScope, a.P)
941 }
942 p.addElement()
943 p.framesetOK = false
944 p.im = inTableIM
945 return true
946 case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
947 p.reconstructActiveFormattingElements()
948 p.addElement()
949 p.oe.pop()
950 p.acknowledgeSelfClosingTag()
951 if p.tok.DataAtom == a.Input {
952 for _, t := range p.tok.Attr {
953 if t.Key == "type" {
954 if strings.ToLower(t.Val) == "hidden" {
955 // Skip setting framesetOK = false
956 return true
957 }
958 }
959 }
960 }
961 p.framesetOK = false
962 case a.Param, a.Source, a.Track:
963 p.addElement()
964 p.oe.pop()
965 p.acknowledgeSelfClosingTag()
966 case a.Hr:
967 p.popUntil(buttonScope, a.P)
968 p.addElement()
969 p.oe.pop()
970 p.acknowledgeSelfClosingTag()
971 p.framesetOK = false
972 case a.Image:
973 p.tok.DataAtom = a.Img
974 p.tok.Data = a.Img.String()
975 return false
976 case a.Isindex:
977 if p.form != nil {
978 // Ignore the token.
979 return true
980 }
981 action := ""
982 prompt := "This is a searchable index. Enter search keywords: "
983 attr := []Attribute{{Key: "name", Val: "isindex"}}
984 for _, t := range p.tok.Attr {
985 switch t.Key {
986 case "action":
987 action = t.Val
988 case "name":
989 // Ignore the attribute.
990 case "prompt":
991 prompt = t.Val
992 default:
993 attr = append(attr, t)
994 }
995 }
996 p.acknowledgeSelfClosingTag()
997 p.popUntil(buttonScope, a.P)
998 p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
999 if action != "" {
1000 p.form.Attr = []Attribute{{Key: "action", Val: action}}
1001 }
1002 p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
1003 p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
1004 p.addText(prompt)
1005 p.addChild(&Node{
1006 Type: ElementNode,
1007 DataAtom: a.Input,
1008 Data: a.Input.String(),
1009 Attr: attr,
1010 })
1011 p.oe.pop()
1012 p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
1013 p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
1014 p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
1015 case a.Textarea:
1016 p.addElement()
1017 p.setOriginalIM()
1018 p.framesetOK = false
1019 p.im = textIM
1020 case a.Xmp:
1021 p.popUntil(buttonScope, a.P)
1022 p.reconstructActiveFormattingElements()
1023 p.framesetOK = false
1024 p.addElement()
1025 p.setOriginalIM()
1026 p.im = textIM
1027 case a.Iframe:
1028 p.framesetOK = false
1029 p.addElement()
1030 p.setOriginalIM()
1031 p.im = textIM
1032 case a.Noembed, a.Noscript:
1033 p.addElement()
1034 p.setOriginalIM()
1035 p.im = textIM
1036 case a.Select:
1037 p.reconstructActiveFormattingElements()
1038 p.addElement()
1039 p.framesetOK = false
1040 p.im = inSelectIM
1041 return true
1042 case a.Optgroup, a.Option:
1043 if p.top().DataAtom == a.Option {
1044 p.oe.pop()
1045 }
1046 p.reconstructActiveFormattingElements()
1047 p.addElement()
1048 case a.Rb, a.Rtc:
1049 if p.elementInScope(defaultScope, a.Ruby) {
1050 p.generateImpliedEndTags()
1051 }
1052 p.addElement()
1053 case a.Rp, a.Rt:
1054 if p.elementInScope(defaultScope, a.Ruby) {
1055 p.generateImpliedEndTags("rtc")
1056 }
1057 p.addElement()
1058 case a.Math, a.Svg:
1059 p.reconstructActiveFormattingElements()
1060 if p.tok.DataAtom == a.Math {
1061 adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
1062 } else {
1063 adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
1064 }
1065 adjustForeignAttributes(p.tok.Attr)
1066 p.addElement()
1067 p.top().Namespace = p.tok.Data
1068 if p.hasSelfClosingToken {
1069 p.oe.pop()
1070 p.acknowledgeSelfClosingTag()
1071 }
1072 return true
1073 case a.Frame:
1074 // TODO: remove this divergence from the HTML5 spec.
1075 if p.oe.contains(a.Template) {
1076 p.addElement()
1077 return true
1078 }
1079 case a.Caption, a.Col, a.Colgroup, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1080 // Ignore the token.
1081 default:
1082 p.reconstructActiveFormattingElements()
1083 p.addElement()
1084 }
1085 case EndTagToken:
1086 switch p.tok.DataAtom {
1087 case a.Body:
1088 if p.elementInScope(defaultScope, a.Body) {
1089 p.im = afterBodyIM
1090 }
1091 case a.Html:
1092 if p.elementInScope(defaultScope, a.Body) {
1093 p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
1094 return false
1095 }
1096 return true
1097 case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
1098 p.popUntil(defaultScope, p.tok.DataAtom)
1099 case a.Form:
1100 if p.oe.contains(a.Template) {
1101 if !p.oe.contains(a.Form) {
1102 // Ignore the token.
1103 return true
1104 }
1105 p.generateImpliedEndTags()
1106 if p.tok.DataAtom == a.Form {
1107 // Ignore the token.
1108 return true
1109 }
1110 p.popUntil(defaultScope, a.Form)
1111 } else {
1112 node := p.form
1113 p.form = nil
1114 i := p.indexOfElementInScope(defaultScope, a.Form)
1115 if node == nil || i == -1 || p.oe[i] != node {
1116 // Ignore the token.
1117 return true
1118 }
1119 p.generateImpliedEndTags()
1120 p.oe.remove(node)
1121 }
1122 case a.P:
1123 if !p.elementInScope(buttonScope, a.P) {
1124 p.parseImpliedToken(StartTagToken, a.P, a.P.String())
1125 }
1126 p.popUntil(buttonScope, a.P)
1127 case a.Li:
1128 p.popUntil(listItemScope, a.Li)
1129 case a.Dd, a.Dt:
1130 p.popUntil(defaultScope, p.tok.DataAtom)
1131 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
1132 p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
1133 case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1134 p.inBodyEndTagFormatting(p.tok.DataAtom)
1135 case a.Applet, a.Marquee, a.Object:
1136 if p.popUntil(defaultScope, p.tok.DataAtom) {
1137 p.clearActiveFormattingElements()
1138 }
1139 case a.Br:
1140 p.tok.Type = StartTagToken
1141 return false
1142 case a.Template:
1143 return inHeadIM(p)
1144 default:
1145 p.inBodyEndTagOther(p.tok.DataAtom)
1146 }
1147 case CommentToken:
1148 p.addChild(&Node{
1149 Type: CommentNode,
1150 Data: p.tok.Data,
1151 })
1152 case ErrorToken:
1153 // TODO: remove this divergence from the HTML5 spec.
1154 if len(p.templateStack) > 0 {
1155 p.im = inTemplateIM
1156 return false
1157 } else {
1158 for _, e := range p.oe {
1159 switch e.DataAtom {
1160 case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
1161 a.Thead, a.Tr, a.Body, a.Html:
1162 default:
1163 return true
1164 }
1165 }
1166 }
1167 }
1168
1169 return true
1170}
1171
1172func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) {
1173 // This is the "adoption agency" algorithm, described at
1174 // https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
1175
1176 // TODO: this is a fairly literal line-by-line translation of that algorithm.
1177 // Once the code successfully parses the comprehensive test suite, we should
1178 // refactor this code to be more idiomatic.
1179
1180 // Steps 1-4. The outer loop.
1181 for i := 0; i < 8; i++ {
1182 // Step 5. Find the formatting element.
1183 var formattingElement *Node
1184 for j := len(p.afe) - 1; j >= 0; j-- {
1185 if p.afe[j].Type == scopeMarkerNode {
1186 break
1187 }
1188 if p.afe[j].DataAtom == tagAtom {
1189 formattingElement = p.afe[j]
1190 break
1191 }
1192 }
1193 if formattingElement == nil {
1194 p.inBodyEndTagOther(tagAtom)
1195 return
1196 }
1197 feIndex := p.oe.index(formattingElement)
1198 if feIndex == -1 {
1199 p.afe.remove(formattingElement)
1200 return
1201 }
1202 if !p.elementInScope(defaultScope, tagAtom) {
1203 // Ignore the tag.
1204 return
1205 }
1206
1207 // Steps 9-10. Find the furthest block.
1208 var furthestBlock *Node
1209 for _, e := range p.oe[feIndex:] {
1210 if isSpecialElement(e) {
1211 furthestBlock = e
1212 break
1213 }
1214 }
1215 if furthestBlock == nil {
1216 e := p.oe.pop()
1217 for e != formattingElement {
1218 e = p.oe.pop()
1219 }
1220 p.afe.remove(e)
1221 return
1222 }
1223
1224 // Steps 11-12. Find the common ancestor and bookmark node.
1225 commonAncestor := p.oe[feIndex-1]
1226 bookmark := p.afe.index(formattingElement)
1227
1228 // Step 13. The inner loop. Find the lastNode to reparent.
1229 lastNode := furthestBlock
1230 node := furthestBlock
1231 x := p.oe.index(node)
1232 // Steps 13.1-13.2
1233 for j := 0; j < 3; j++ {
1234 // Step 13.3.
1235 x--
1236 node = p.oe[x]
1237 // Step 13.4 - 13.5.
1238 if p.afe.index(node) == -1 {
1239 p.oe.remove(node)
1240 continue
1241 }
1242 // Step 13.6.
1243 if node == formattingElement {
1244 break
1245 }
1246 // Step 13.7.
1247 clone := node.clone()
1248 p.afe[p.afe.index(node)] = clone
1249 p.oe[p.oe.index(node)] = clone
1250 node = clone
1251 // Step 13.8.
1252 if lastNode == furthestBlock {
1253 bookmark = p.afe.index(node) + 1
1254 }
1255 // Step 13.9.
1256 if lastNode.Parent != nil {
1257 lastNode.Parent.RemoveChild(lastNode)
1258 }
1259 node.AppendChild(lastNode)
1260 // Step 13.10.
1261 lastNode = node
1262 }
1263
1264 // Step 14. Reparent lastNode to the common ancestor,
1265 // or for misnested table nodes, to the foster parent.
1266 if lastNode.Parent != nil {
1267 lastNode.Parent.RemoveChild(lastNode)
1268 }
1269 switch commonAncestor.DataAtom {
1270 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1271 p.fosterParent(lastNode)
1272 case a.Template:
1273 // TODO: remove namespace checking
1274 if commonAncestor.Namespace == "html" {
1275 commonAncestor = commonAncestor.LastChild
1276 }
1277 fallthrough
1278 default:
1279 commonAncestor.AppendChild(lastNode)
1280 }
1281
1282 // Steps 15-17. Reparent nodes from the furthest block's children
1283 // to a clone of the formatting element.
1284 clone := formattingElement.clone()
1285 reparentChildren(clone, furthestBlock)
1286 furthestBlock.AppendChild(clone)
1287
1288 // Step 18. Fix up the list of active formatting elements.
1289 if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
1290 // Move the bookmark with the rest of the list.
1291 bookmark--
1292 }
1293 p.afe.remove(formattingElement)
1294 p.afe.insert(bookmark, clone)
1295
1296 // Step 19. Fix up the stack of open elements.
1297 p.oe.remove(formattingElement)
1298 p.oe.insert(p.oe.index(furthestBlock)+1, clone)
1299 }
1300}
1301
1302// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
1303// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
1304// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
1305func (p *parser) inBodyEndTagOther(tagAtom a.Atom) {
1306 for i := len(p.oe) - 1; i >= 0; i-- {
1307 if p.oe[i].DataAtom == tagAtom {
1308 p.oe = p.oe[:i]
1309 break
1310 }
1311 if isSpecialElement(p.oe[i]) {
1312 break
1313 }
1314 }
1315}
1316
1317// Section 12.2.6.4.8.
1318func textIM(p *parser) bool {
1319 switch p.tok.Type {
1320 case ErrorToken:
1321 p.oe.pop()
1322 case TextToken:
1323 d := p.tok.Data
1324 if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
1325 // Ignore a newline at the start of a <textarea> block.
1326 if d != "" && d[0] == '\r' {
1327 d = d[1:]
1328 }
1329 if d != "" && d[0] == '\n' {
1330 d = d[1:]
1331 }
1332 }
1333 if d == "" {
1334 return true
1335 }
1336 p.addText(d)
1337 return true
1338 case EndTagToken:
1339 p.oe.pop()
1340 }
1341 p.im = p.originalIM
1342 p.originalIM = nil
1343 return p.tok.Type == EndTagToken
1344}
1345
1346// Section 12.2.6.4.9.
1347func inTableIM(p *parser) bool {
1348 switch p.tok.Type {
1349 case ErrorToken:
1350 // Stop parsing.
1351 return true
1352 case TextToken:
1353 p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
1354 switch p.oe.top().DataAtom {
1355 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1356 if strings.Trim(p.tok.Data, whitespace) == "" {
1357 p.addText(p.tok.Data)
1358 return true
1359 }
1360 }
1361 case StartTagToken:
1362 switch p.tok.DataAtom {
1363 case a.Caption:
1364 p.clearStackToContext(tableScope)
1365 p.afe = append(p.afe, &scopeMarker)
1366 p.addElement()
1367 p.im = inCaptionIM
1368 return true
1369 case a.Colgroup:
1370 p.clearStackToContext(tableScope)
1371 p.addElement()
1372 p.im = inColumnGroupIM
1373 return true
1374 case a.Col:
1375 p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
1376 return false
1377 case a.Tbody, a.Tfoot, a.Thead:
1378 p.clearStackToContext(tableScope)
1379 p.addElement()
1380 p.im = inTableBodyIM
1381 return true
1382 case a.Td, a.Th, a.Tr:
1383 p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
1384 return false
1385 case a.Table:
1386 if p.popUntil(tableScope, a.Table) {
1387 p.resetInsertionMode()
1388 return false
1389 }
1390 // Ignore the token.
1391 return true
1392 case a.Style, a.Script, a.Template:
1393 return inHeadIM(p)
1394 case a.Input:
1395 for _, t := range p.tok.Attr {
1396 if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
1397 p.addElement()
1398 p.oe.pop()
1399 return true
1400 }
1401 }
1402 // Otherwise drop down to the default action.
1403 case a.Form:
1404 if p.oe.contains(a.Template) || p.form != nil {
1405 // Ignore the token.
1406 return true
1407 }
1408 p.addElement()
1409 p.form = p.oe.pop()
1410 case a.Select:
1411 p.reconstructActiveFormattingElements()
1412 switch p.top().DataAtom {
1413 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1414 p.fosterParenting = true
1415 }
1416 p.addElement()
1417 p.fosterParenting = false
1418 p.framesetOK = false
1419 p.im = inSelectInTableIM
1420 return true
1421 }
1422 case EndTagToken:
1423 switch p.tok.DataAtom {
1424 case a.Table:
1425 if p.popUntil(tableScope, a.Table) {
1426 p.resetInsertionMode()
1427 return true
1428 }
1429 // Ignore the token.
1430 return true
1431 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1432 // Ignore the token.
1433 return true
1434 case a.Template:
1435 return inHeadIM(p)
1436 }
1437 case CommentToken:
1438 p.addChild(&Node{
1439 Type: CommentNode,
1440 Data: p.tok.Data,
1441 })
1442 return true
1443 case DoctypeToken:
1444 // Ignore the token.
1445 return true
1446 }
1447
1448 p.fosterParenting = true
1449 defer func() { p.fosterParenting = false }()
1450
1451 return inBodyIM(p)
1452}
1453
1454// Section 12.2.6.4.11.
1455func inCaptionIM(p *parser) bool {
1456 switch p.tok.Type {
1457 case StartTagToken:
1458 switch p.tok.DataAtom {
1459 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
1460 if p.popUntil(tableScope, a.Caption) {
1461 p.clearActiveFormattingElements()
1462 p.im = inTableIM
1463 return false
1464 } else {
1465 // Ignore the token.
1466 return true
1467 }
1468 case a.Select:
1469 p.reconstructActiveFormattingElements()
1470 p.addElement()
1471 p.framesetOK = false
1472 p.im = inSelectInTableIM
1473 return true
1474 }
1475 case EndTagToken:
1476 switch p.tok.DataAtom {
1477 case a.Caption:
1478 if p.popUntil(tableScope, a.Caption) {
1479 p.clearActiveFormattingElements()
1480 p.im = inTableIM
1481 }
1482 return true
1483 case a.Table:
1484 if p.popUntil(tableScope, a.Caption) {
1485 p.clearActiveFormattingElements()
1486 p.im = inTableIM
1487 return false
1488 } else {
1489 // Ignore the token.
1490 return true
1491 }
1492 case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1493 // Ignore the token.
1494 return true
1495 }
1496 }
1497 return inBodyIM(p)
1498}
1499
1500// Section 12.2.6.4.12.
1501func inColumnGroupIM(p *parser) bool {
1502 switch p.tok.Type {
1503 case TextToken:
1504 s := strings.TrimLeft(p.tok.Data, whitespace)
1505 if len(s) < len(p.tok.Data) {
1506 // Add the initial whitespace to the current node.
1507 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
1508 if s == "" {
1509 return true
1510 }
1511 p.tok.Data = s
1512 }
1513 case CommentToken:
1514 p.addChild(&Node{
1515 Type: CommentNode,
1516 Data: p.tok.Data,
1517 })
1518 return true
1519 case DoctypeToken:
1520 // Ignore the token.
1521 return true
1522 case StartTagToken:
1523 switch p.tok.DataAtom {
1524 case a.Html:
1525 return inBodyIM(p)
1526 case a.Col:
1527 p.addElement()
1528 p.oe.pop()
1529 p.acknowledgeSelfClosingTag()
1530 return true
1531 case a.Template:
1532 return inHeadIM(p)
1533 }
1534 case EndTagToken:
1535 switch p.tok.DataAtom {
1536 case a.Colgroup:
1537 if p.oe.top().DataAtom == a.Colgroup {
1538 p.oe.pop()
1539 p.im = inTableIM
1540 }
1541 return true
1542 case a.Col:
1543 // Ignore the token.
1544 return true
1545 case a.Template:
1546 return inHeadIM(p)
1547 }
1548 }
1549 if p.oe.top().DataAtom != a.Colgroup {
1550 return true
1551 }
1552 p.oe.pop()
1553 p.im = inTableIM
1554 return false
1555}
1556
1557// Section 12.2.6.4.13.
1558func inTableBodyIM(p *parser) bool {
1559 switch p.tok.Type {
1560 case StartTagToken:
1561 switch p.tok.DataAtom {
1562 case a.Tr:
1563 p.clearStackToContext(tableBodyScope)
1564 p.addElement()
1565 p.im = inRowIM
1566 return true
1567 case a.Td, a.Th:
1568 p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
1569 return false
1570 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1571 if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1572 p.im = inTableIM
1573 return false
1574 }
1575 // Ignore the token.
1576 return true
1577 }
1578 case EndTagToken:
1579 switch p.tok.DataAtom {
1580 case a.Tbody, a.Tfoot, a.Thead:
1581 if p.elementInScope(tableScope, p.tok.DataAtom) {
1582 p.clearStackToContext(tableBodyScope)
1583 p.oe.pop()
1584 p.im = inTableIM
1585 }
1586 return true
1587 case a.Table:
1588 if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1589 p.im = inTableIM
1590 return false
1591 }
1592 // Ignore the token.
1593 return true
1594 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
1595 // Ignore the token.
1596 return true
1597 }
1598 case CommentToken:
1599 p.addChild(&Node{
1600 Type: CommentNode,
1601 Data: p.tok.Data,
1602 })
1603 return true
1604 }
1605
1606 return inTableIM(p)
1607}
1608
1609// Section 12.2.6.4.14.
1610func inRowIM(p *parser) bool {
1611 switch p.tok.Type {
1612 case StartTagToken:
1613 switch p.tok.DataAtom {
1614 case a.Td, a.Th:
1615 p.clearStackToContext(tableRowScope)
1616 p.addElement()
1617 p.afe = append(p.afe, &scopeMarker)
1618 p.im = inCellIM
1619 return true
1620 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1621 if p.popUntil(tableScope, a.Tr) {
1622 p.im = inTableBodyIM
1623 return false
1624 }
1625 // Ignore the token.
1626 return true
1627 }
1628 case EndTagToken:
1629 switch p.tok.DataAtom {
1630 case a.Tr:
1631 if p.popUntil(tableScope, a.Tr) {
1632 p.im = inTableBodyIM
1633 return true
1634 }
1635 // Ignore the token.
1636 return true
1637 case a.Table:
1638 if p.popUntil(tableScope, a.Tr) {
1639 p.im = inTableBodyIM
1640 return false
1641 }
1642 // Ignore the token.
1643 return true
1644 case a.Tbody, a.Tfoot, a.Thead:
1645 if p.elementInScope(tableScope, p.tok.DataAtom) {
1646 p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
1647 return false
1648 }
1649 // Ignore the token.
1650 return true
1651 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
1652 // Ignore the token.
1653 return true
1654 }
1655 }
1656
1657 return inTableIM(p)
1658}
1659
1660// Section 12.2.6.4.15.
1661func inCellIM(p *parser) bool {
1662 switch p.tok.Type {
1663 case StartTagToken:
1664 switch p.tok.DataAtom {
1665 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1666 if p.popUntil(tableScope, a.Td, a.Th) {
1667 // Close the cell and reprocess.
1668 p.clearActiveFormattingElements()
1669 p.im = inRowIM
1670 return false
1671 }
1672 // Ignore the token.
1673 return true
1674 case a.Select:
1675 p.reconstructActiveFormattingElements()
1676 p.addElement()
1677 p.framesetOK = false
1678 p.im = inSelectInTableIM
1679 return true
1680 }
1681 case EndTagToken:
1682 switch p.tok.DataAtom {
1683 case a.Td, a.Th:
1684 if !p.popUntil(tableScope, p.tok.DataAtom) {
1685 // Ignore the token.
1686 return true
1687 }
1688 p.clearActiveFormattingElements()
1689 p.im = inRowIM
1690 return true
1691 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
1692 // Ignore the token.
1693 return true
1694 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1695 if !p.elementInScope(tableScope, p.tok.DataAtom) {
1696 // Ignore the token.
1697 return true
1698 }
1699 // Close the cell and reprocess.
1700 p.popUntil(tableScope, a.Td, a.Th)
1701 p.clearActiveFormattingElements()
1702 p.im = inRowIM
1703 return false
1704 }
1705 }
1706 return inBodyIM(p)
1707}
1708
1709// Section 12.2.6.4.16.
1710func inSelectIM(p *parser) bool {
1711 switch p.tok.Type {
1712 case ErrorToken:
1713 // Stop parsing.
1714 return true
1715 case TextToken:
1716 p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
1717 case StartTagToken:
1718 switch p.tok.DataAtom {
1719 case a.Html:
1720 return inBodyIM(p)
1721 case a.Option:
1722 if p.top().DataAtom == a.Option {
1723 p.oe.pop()
1724 }
1725 p.addElement()
1726 case a.Optgroup:
1727 if p.top().DataAtom == a.Option {
1728 p.oe.pop()
1729 }
1730 if p.top().DataAtom == a.Optgroup {
1731 p.oe.pop()
1732 }
1733 p.addElement()
1734 case a.Select:
1735 p.tok.Type = EndTagToken
1736 return false
1737 case a.Input, a.Keygen, a.Textarea:
1738 if p.elementInScope(selectScope, a.Select) {
1739 p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1740 return false
1741 }
1742 // In order to properly ignore <textarea>, we need to change the tokenizer mode.
1743 p.tokenizer.NextIsNotRawText()
1744 // Ignore the token.
1745 return true
1746 case a.Script, a.Template:
1747 return inHeadIM(p)
1748 }
1749 case EndTagToken:
1750 switch p.tok.DataAtom {
1751 case a.Option:
1752 if p.top().DataAtom == a.Option {
1753 p.oe.pop()
1754 }
1755 case a.Optgroup:
1756 i := len(p.oe) - 1
1757 if p.oe[i].DataAtom == a.Option {
1758 i--
1759 }
1760 if p.oe[i].DataAtom == a.Optgroup {
1761 p.oe = p.oe[:i]
1762 }
1763 case a.Select:
1764 if p.popUntil(selectScope, a.Select) {
1765 p.resetInsertionMode()
1766 }
1767 case a.Template:
1768 return inHeadIM(p)
1769 }
1770 case CommentToken:
1771 p.addChild(&Node{
1772 Type: CommentNode,
1773 Data: p.tok.Data,
1774 })
1775 case DoctypeToken:
1776 // Ignore the token.
1777 return true
1778 }
1779
1780 return true
1781}
1782
1783// Section 12.2.6.4.17.
1784func inSelectInTableIM(p *parser) bool {
1785 switch p.tok.Type {
1786 case StartTagToken, EndTagToken:
1787 switch p.tok.DataAtom {
1788 case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
1789 if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.DataAtom) {
1790 p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1791 return false
1792 } else {
1793 // Ignore the token.
1794 return true
1795 }
1796 }
1797 }
1798 return inSelectIM(p)
1799}
1800
1801// Section 12.2.6.4.18.
1802func inTemplateIM(p *parser) bool {
1803 switch p.tok.Type {
1804 case TextToken, CommentToken, DoctypeToken:
1805 return inBodyIM(p)
1806 case StartTagToken:
1807 switch p.tok.DataAtom {
1808 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
1809 return inHeadIM(p)
1810 case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1811 p.templateStack.pop()
1812 p.templateStack = append(p.templateStack, inTableIM)
1813 p.im = inTableIM
1814 return false
1815 case a.Col:
1816 p.templateStack.pop()
1817 p.templateStack = append(p.templateStack, inColumnGroupIM)
1818 p.im = inColumnGroupIM
1819 return false
1820 case a.Tr:
1821 p.templateStack.pop()
1822 p.templateStack = append(p.templateStack, inTableBodyIM)
1823 p.im = inTableBodyIM
1824 return false
1825 case a.Td, a.Th:
1826 p.templateStack.pop()
1827 p.templateStack = append(p.templateStack, inRowIM)
1828 p.im = inRowIM
1829 return false
1830 default:
1831 p.templateStack.pop()
1832 p.templateStack = append(p.templateStack, inBodyIM)
1833 p.im = inBodyIM
1834 return false
1835 }
1836 case EndTagToken:
1837 switch p.tok.DataAtom {
1838 case a.Template:
1839 return inHeadIM(p)
1840 default:
1841 // Ignore the token.
1842 return true
1843 }
1844 }
1845 if !p.oe.contains(a.Template) {
1846 // Ignore the token.
1847 return true
1848 }
1849 p.popUntil(defaultScope, a.Template)
1850 p.clearActiveFormattingElements()
1851 p.templateStack.pop()
1852 p.resetInsertionMode()
1853 return false
1854}
1855
1856// Section 12.2.6.4.19.
1857func afterBodyIM(p *parser) bool {
1858 switch p.tok.Type {
1859 case ErrorToken:
1860 // Stop parsing.
1861 return true
1862 case TextToken:
1863 s := strings.TrimLeft(p.tok.Data, whitespace)
1864 if len(s) == 0 {
1865 // It was all whitespace.
1866 return inBodyIM(p)
1867 }
1868 case StartTagToken:
1869 if p.tok.DataAtom == a.Html {
1870 return inBodyIM(p)
1871 }
1872 case EndTagToken:
1873 if p.tok.DataAtom == a.Html {
1874 if !p.fragment {
1875 p.im = afterAfterBodyIM
1876 }
1877 return true
1878 }
1879 case CommentToken:
1880 // The comment is attached to the <html> element.
1881 if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
1882 panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
1883 }
1884 p.oe[0].AppendChild(&Node{
1885 Type: CommentNode,
1886 Data: p.tok.Data,
1887 })
1888 return true
1889 }
1890 p.im = inBodyIM
1891 return false
1892}
1893
1894// Section 12.2.6.4.20.
1895func inFramesetIM(p *parser) bool {
1896 switch p.tok.Type {
1897 case CommentToken:
1898 p.addChild(&Node{
1899 Type: CommentNode,
1900 Data: p.tok.Data,
1901 })
1902 case TextToken:
1903 // Ignore all text but whitespace.
1904 s := strings.Map(func(c rune) rune {
1905 switch c {
1906 case ' ', '\t', '\n', '\f', '\r':
1907 return c
1908 }
1909 return -1
1910 }, p.tok.Data)
1911 if s != "" {
1912 p.addText(s)
1913 }
1914 case StartTagToken:
1915 switch p.tok.DataAtom {
1916 case a.Html:
1917 return inBodyIM(p)
1918 case a.Frameset:
1919 p.addElement()
1920 case a.Frame:
1921 p.addElement()
1922 p.oe.pop()
1923 p.acknowledgeSelfClosingTag()
1924 case a.Noframes:
1925 return inHeadIM(p)
1926 case a.Template:
1927 // TODO: remove this divergence from the HTML5 spec.
1928 //
1929 // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
1930 return inTemplateIM(p)
1931 }
1932 case EndTagToken:
1933 switch p.tok.DataAtom {
1934 case a.Frameset:
1935 if p.oe.top().DataAtom != a.Html {
1936 p.oe.pop()
1937 if p.oe.top().DataAtom != a.Frameset {
1938 p.im = afterFramesetIM
1939 return true
1940 }
1941 }
1942 }
1943 default:
1944 // Ignore the token.
1945 }
1946 return true
1947}
1948
1949// Section 12.2.6.4.21.
1950func afterFramesetIM(p *parser) bool {
1951 switch p.tok.Type {
1952 case CommentToken:
1953 p.addChild(&Node{
1954 Type: CommentNode,
1955 Data: p.tok.Data,
1956 })
1957 case TextToken:
1958 // Ignore all text but whitespace.
1959 s := strings.Map(func(c rune) rune {
1960 switch c {
1961 case ' ', '\t', '\n', '\f', '\r':
1962 return c
1963 }
1964 return -1
1965 }, p.tok.Data)
1966 if s != "" {
1967 p.addText(s)
1968 }
1969 case StartTagToken:
1970 switch p.tok.DataAtom {
1971 case a.Html:
1972 return inBodyIM(p)
1973 case a.Noframes:
1974 return inHeadIM(p)
1975 }
1976 case EndTagToken:
1977 switch p.tok.DataAtom {
1978 case a.Html:
1979 p.im = afterAfterFramesetIM
1980 return true
1981 }
1982 default:
1983 // Ignore the token.
1984 }
1985 return true
1986}
1987
1988// Section 12.2.6.4.22.
1989func afterAfterBodyIM(p *parser) bool {
1990 switch p.tok.Type {
1991 case ErrorToken:
1992 // Stop parsing.
1993 return true
1994 case TextToken:
1995 s := strings.TrimLeft(p.tok.Data, whitespace)
1996 if len(s) == 0 {
1997 // It was all whitespace.
1998 return inBodyIM(p)
1999 }
2000 case StartTagToken:
2001 if p.tok.DataAtom == a.Html {
2002 return inBodyIM(p)
2003 }
2004 case CommentToken:
2005 p.doc.AppendChild(&Node{
2006 Type: CommentNode,
2007 Data: p.tok.Data,
2008 })
2009 return true
2010 case DoctypeToken:
2011 return inBodyIM(p)
2012 }
2013 p.im = inBodyIM
2014 return false
2015}
2016
2017// Section 12.2.6.4.23.
2018func afterAfterFramesetIM(p *parser) bool {
2019 switch p.tok.Type {
2020 case CommentToken:
2021 p.doc.AppendChild(&Node{
2022 Type: CommentNode,
2023 Data: p.tok.Data,
2024 })
2025 case TextToken:
2026 // Ignore all text but whitespace.
2027 s := strings.Map(func(c rune) rune {
2028 switch c {
2029 case ' ', '\t', '\n', '\f', '\r':
2030 return c
2031 }
2032 return -1
2033 }, p.tok.Data)
2034 if s != "" {
2035 p.tok.Data = s
2036 return inBodyIM(p)
2037 }
2038 case StartTagToken:
2039 switch p.tok.DataAtom {
2040 case a.Html:
2041 return inBodyIM(p)
2042 case a.Noframes:
2043 return inHeadIM(p)
2044 }
2045 case DoctypeToken:
2046 return inBodyIM(p)
2047 default:
2048 // Ignore the token.
2049 }
2050 return true
2051}
2052
2053const whitespaceOrNUL = whitespace + "\x00"
2054
2055// Section 12.2.6.5
2056func parseForeignContent(p *parser) bool {
2057 switch p.tok.Type {
2058 case TextToken:
2059 if p.framesetOK {
2060 p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
2061 }
2062 p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
2063 p.addText(p.tok.Data)
2064 case CommentToken:
2065 p.addChild(&Node{
2066 Type: CommentNode,
2067 Data: p.tok.Data,
2068 })
2069 case StartTagToken:
2070 b := breakout[p.tok.Data]
2071 if p.tok.DataAtom == a.Font {
2072 loop:
2073 for _, attr := range p.tok.Attr {
2074 switch attr.Key {
2075 case "color", "face", "size":
2076 b = true
2077 break loop
2078 }
2079 }
2080 }
2081 if b {
2082 for i := len(p.oe) - 1; i >= 0; i-- {
2083 n := p.oe[i]
2084 if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
2085 p.oe = p.oe[:i+1]
2086 break
2087 }
2088 }
2089 return false
2090 }
2091 switch p.top().Namespace {
2092 case "math":
2093 adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
2094 case "svg":
2095 // Adjust SVG tag names. The tokenizer lower-cases tag names, but
2096 // SVG wants e.g. "foreignObject" with a capital second "O".
2097 if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
2098 p.tok.DataAtom = a.Lookup([]byte(x))
2099 p.tok.Data = x
2100 }
2101 adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
2102 default:
2103 panic("html: bad parser state: unexpected namespace")
2104 }
2105 adjustForeignAttributes(p.tok.Attr)
2106 namespace := p.top().Namespace
2107 p.addElement()
2108 p.top().Namespace = namespace
2109 if namespace != "" {
2110 // Don't let the tokenizer go into raw text mode in foreign content
2111 // (e.g. in an SVG <title> tag).
2112 p.tokenizer.NextIsNotRawText()
2113 }
2114 if p.hasSelfClosingToken {
2115 p.oe.pop()
2116 p.acknowledgeSelfClosingTag()
2117 }
2118 case EndTagToken:
2119 for i := len(p.oe) - 1; i >= 0; i-- {
2120 if p.oe[i].Namespace == "" {
2121 return p.im(p)
2122 }
2123 if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
2124 p.oe = p.oe[:i]
2125 break
2126 }
2127 }
2128 return true
2129 default:
2130 // Ignore the token.
2131 }
2132 return true
2133}
2134
2135// Section 12.2.6.
2136func (p *parser) inForeignContent() bool {
2137 if len(p.oe) == 0 {
2138 return false
2139 }
2140 n := p.oe[len(p.oe)-1]
2141 if n.Namespace == "" {
2142 return false
2143 }
2144 if mathMLTextIntegrationPoint(n) {
2145 if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
2146 return false
2147 }
2148 if p.tok.Type == TextToken {
2149 return false
2150 }
2151 }
2152 if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
2153 return false
2154 }
2155 if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
2156 return false
2157 }
2158 if p.tok.Type == ErrorToken {
2159 return false
2160 }
2161 return true
2162}
2163
2164// parseImpliedToken parses a token as though it had appeared in the parser's
2165// input.
2166func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
2167 realToken, selfClosing := p.tok, p.hasSelfClosingToken
2168 p.tok = Token{
2169 Type: t,
2170 DataAtom: dataAtom,
2171 Data: data,
2172 }
2173 p.hasSelfClosingToken = false
2174 p.parseCurrentToken()
2175 p.tok, p.hasSelfClosingToken = realToken, selfClosing
2176}
2177
2178// parseCurrentToken runs the current token through the parsing routines
2179// until it is consumed.
2180func (p *parser) parseCurrentToken() {
2181 if p.tok.Type == SelfClosingTagToken {
2182 p.hasSelfClosingToken = true
2183 p.tok.Type = StartTagToken
2184 }
2185
2186 consumed := false
2187 for !consumed {
2188 if p.inForeignContent() {
2189 consumed = parseForeignContent(p)
2190 } else {
2191 consumed = p.im(p)
2192 }
2193 }
2194
2195 if p.hasSelfClosingToken {
2196 // This is a parse error, but ignore it.
2197 p.hasSelfClosingToken = false
2198 }
2199}
2200
2201func (p *parser) parse() error {
2202 // Iterate until EOF. Any other error will cause an early return.
2203 var err error
2204 for err != io.EOF {
2205 // CDATA sections are allowed only in foreign content.
2206 n := p.oe.top()
2207 p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
2208 // Read and parse the next token.
2209 p.tokenizer.Next()
2210 p.tok = p.tokenizer.Token()
2211 if p.tok.Type == ErrorToken {
2212 err = p.tokenizer.Err()
2213 if err != nil && err != io.EOF {
2214 return err
2215 }
2216 }
2217 p.parseCurrentToken()
2218 }
2219 return nil
2220}
2221
2222// Parse returns the parse tree for the HTML from the given Reader.
2223// The input is assumed to be UTF-8 encoded.
2224func Parse(r io.Reader) (*Node, error) {
2225 p := &parser{
2226 tokenizer: NewTokenizer(r),
2227 doc: &Node{
2228 Type: DocumentNode,
2229 },
2230 scripting: true,
2231 framesetOK: true,
2232 im: initialIM,
2233 }
2234 err := p.parse()
2235 if err != nil {
2236 return nil, err
2237 }
2238 return p.doc, nil
2239}
2240
2241// ParseFragment parses a fragment of HTML and returns the nodes that were
2242// found. If the fragment is the InnerHTML for an existing element, pass that
2243// element in context.
2244func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
2245 contextTag := ""
2246 if context != nil {
2247 if context.Type != ElementNode {
2248 return nil, errors.New("html: ParseFragment of non-element Node")
2249 }
2250 // The next check isn't just context.DataAtom.String() == context.Data because
2251 // it is valid to pass an element whose tag isn't a known atom. For example,
2252 // DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
2253 if context.DataAtom != a.Lookup([]byte(context.Data)) {
2254 return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
2255 }
2256 contextTag = context.DataAtom.String()
2257 }
2258 p := &parser{
2259 tokenizer: NewTokenizerFragment(r, contextTag),
2260 doc: &Node{
2261 Type: DocumentNode,
2262 },
2263 scripting: true,
2264 fragment: true,
2265 context: context,
2266 }
2267
2268 root := &Node{
2269 Type: ElementNode,
2270 DataAtom: a.Html,
2271 Data: a.Html.String(),
2272 }
2273 p.doc.AppendChild(root)
2274 p.oe = nodeStack{root}
2275 if context != nil && context.DataAtom == a.Template {
2276 p.templateStack = append(p.templateStack, inTemplateIM)
2277 }
2278 p.resetInsertionMode()
2279
2280 for n := context; n != nil; n = n.Parent {
2281 if n.Type == ElementNode && n.DataAtom == a.Form {
2282 p.form = n
2283 break
2284 }
2285 }
2286
2287 err := p.parse()
2288 if err != nil {
2289 return nil, err
2290 }
2291
2292 parent := p.doc
2293 if context != nil {
2294 parent = root
2295 }
2296
2297 var result []*Node
2298 for c := parent.FirstChild; c != nil; {
2299 next := c.NextSibling
2300 parent.RemoveChild(c)
2301 result = append(result, c)
2302 c = next
2303 }
2304 return result, nil
2305}