Package org.jsoup.parser
Class HtmlTreeBuilder
- java.lang.Object
-
- org.jsoup.parser.TreeBuilder
-
- org.jsoup.parser.HtmlTreeBuilder
-
public class HtmlTreeBuilder extends TreeBuilder
HTML Tree Builder; creates a DOM from Tokens.
-
-
Field Summary
Fields Modifier and Type Field Description private boolean
baseUriSetFromDoc
private Element
contextElement
private Token.EndTag
emptyEnd
private java.util.ArrayList<Element>
formattingElements
private FormElement
formElement
private boolean
fosterInserts
private boolean
fragmentParsing
private boolean
framesetOk
private Element
headElement
private static int
maxQueueDepth
static int
MaxScopeSearchDepth
private static int
maxUsedFormattingElements
private HtmlTreeBuilderState
originalState
private java.util.List<Token.Character>
pendingTableCharacters
private java.lang.String[]
specificScopeTarget
private HtmlTreeBuilderState
state
(package private) static java.lang.String[]
TagMathMlTextIntegration
(package private) static java.lang.String[]
TagSearchButton
(package private) static java.lang.String[]
TagSearchEndTags
(package private) static java.lang.String[]
TagSearchList
(package private) static java.lang.String[]
TagSearchSelectScope
(package private) static java.lang.String[]
TagSearchSpecial
(package private) static java.lang.String[]
TagSearchTableScope
(package private) static java.lang.String[]
TagsSearchInScope
(package private) static java.lang.String[]
TagSvgHtmlIntegration
(package private) static java.lang.String[]
TagThoroughSearchEndTags
private java.util.ArrayList<HtmlTreeBuilderState>
tmplInsertMode
-
Fields inherited from class org.jsoup.parser.TreeBuilder
baseUri, currentToken, doc, parser, reader, seenTags, settings, stack, tokeniser, trackSourceRange
-
-
Constructor Summary
Constructors Constructor Description HtmlTreeBuilder()
-
Method Summary
All Methods Static Methods Instance Methods Concrete Methods Modifier and Type Method Description (package private) Element
aboveOnStack(Element el)
(package private) void
addPendingTableCharacters(Token.Character c)
(package private) void
checkActiveFormattingElements(Element in)
(package private) void
clearFormattingElementsToLastMarker()
private void
clearStackToContext(java.lang.String... nodeNames)
Removes elements from the stack until one of the supplied HTML elements is removed.(package private) void
clearStackToTableBodyContext()
(package private) void
clearStackToTableContext()
(package private) void
clearStackToTableRowContext()
(package private) void
closeElement(java.lang.String name)
(package private) Element
createElementFor(Token.StartTag startTag, java.lang.String namespace, boolean forcePreserveCase)
(package private) HtmlTreeBuilderState
currentTemplateMode()
(package private) ParseSettings
defaultSettings()
private void
doInsertElement(Element el, Token token)
Inserts the Element onto the stack.(package private) void
error(HtmlTreeBuilderState state)
(package private) boolean
framesetOk()
(package private) void
framesetOk(boolean framesetOk)
(package private) void
generateImpliedEndTags()
(package private) void
generateImpliedEndTags(boolean thorough)
Pops HTML elements off the stack according to the implied end tag rules(package private) void
generateImpliedEndTags(java.lang.String excludeTag)
13.2.6.3 Closing elements that have implied end tags When the steps below require the UA to generate implied end tags, then, while the current node is a dd element, a dt element, an li element, an optgroup element, an option element, a p element, an rb element, an rp element, an rt element, or an rtc element, the UA must pop the current node off the stack of open elements.(package private) Element
getActiveFormattingElement(java.lang.String nodeName)
(package private) java.lang.String
getBaseUri()
(package private) Document
getDocument()
(package private) FormElement
getFormElement()
(package private) Element
getFromStack(java.lang.String elName)
Gets the nearest (lowest) HTML element with the given name from the stack.(package private) Element
getHeadElement()
(package private) java.util.List<Token.Character>
getPendingTableCharacters()
(package private) java.util.ArrayList<Element>
getStack()
(package private) boolean
inButtonScope(java.lang.String targetName)
protected void
initialiseParse(java.io.Reader input, java.lang.String baseUri, Parser parser)
(package private) boolean
inListItemScope(java.lang.String targetName)
(package private) boolean
inScope(java.lang.String targetName)
(package private) boolean
inScope(java.lang.String[] targetNames)
(package private) boolean
inScope(java.lang.String targetName, java.lang.String[] extras)
(package private) boolean
inSelectScope(java.lang.String targetName)
(package private) void
insertCharacterNode(Token.Character characterToken)
Inserts the provided character token into the current element.(package private) void
insertCharacterToElement(Token.Character characterToken, Element el)
Inserts the provided character token into the provided element.(package private) void
insertCommentNode(Token.Comment token)
(package private) Element
insertElementFor(Token.StartTag startTag)
Inserts an HTML element for the given tag)(package private) Element
insertEmptyElementFor(Token.StartTag startTag)
(package private) Element
insertForeignElementFor(Token.StartTag startTag, java.lang.String namespace)
Inserts a foreign element.(package private) FormElement
insertFormElement(Token.StartTag startTag, boolean onStack, boolean checkTemplateStack)
(package private) void
insertInFosterParent(Node in)
(package private) void
insertMarkerToFormattingElements()
(package private) void
insertOnStackAfter(Element after, Element in)
private boolean
inSpecificScope(java.lang.String[] targetNames, java.lang.String[] baseTypes, java.lang.String[] extraTypes)
private boolean
inSpecificScope(java.lang.String targetName, java.lang.String[] baseTypes, java.lang.String[] extraTypes)
(package private) boolean
inTableScope(java.lang.String targetName)
protected boolean
isContentForTagData(java.lang.String normalName)
(An internal method, visible for Element.(package private) boolean
isFosterInserts()
(package private) boolean
isFragmentParsing()
(package private) static boolean
isHtmlIntegration(Element el)
(package private) boolean
isInActiveFormattingElements(Element el)
(package private) static boolean
isMathmlTextIntegration(Element el)
private static boolean
isSameFormattingElement(Element a, Element b)
(package private) static boolean
isSpecial(Element el)
(package private) Element
lastFormattingElement()
(package private) void
markInsertionMode()
(package private) void
maybeSetBaseUri(Element base)
(package private) HtmlTreeBuilder
newInstance()
Create a new copy of this TreeBuilder(package private) boolean
onStack(java.lang.String elName)
Checks if there is an HTML element with the given name on the stack.private static boolean
onStack(java.util.ArrayList<Element> queue, Element element)
(package private) boolean
onStack(Element el)
(package private) boolean
onStackNot(java.lang.String[] allowedTags)
Tests if there is some element on the stack that is not in the provided set.(package private) HtmlTreeBuilderState
originalState()
(package private) java.util.List<Node>
parseFragment(java.lang.String inputFragment, Element context, java.lang.String baseUri, Parser parser)
(package private) Element
popStackToClose(java.lang.String elName)
Pops the stack until the given HTML element is removed.(package private) void
popStackToClose(java.lang.String... elNames)
Pops the stack until one of the given HTML elements is removed.(package private) Element
popStackToCloseAnyNamespace(java.lang.String elName)
Pops the stack until an element with the supplied name is removed, irrespective of namespace.(package private) HtmlTreeBuilderState
popTemplateMode()
(package private) int
positionOfElement(Element el)
protected boolean
process(Token token)
(package private) boolean
process(Token token, HtmlTreeBuilderState state)
(package private) void
pushActiveFormattingElements(Element in)
(package private) void
pushTemplateMode(HtmlTreeBuilderState state)
(package private) void
pushWithBookmark(Element in, int bookmark)
(package private) void
reconstructFormattingElements()
(package private) void
removeFromActiveFormattingElements(Element el)
(package private) boolean
removeFromStack(Element el)
(package private) Element
removeLastFormattingElement()
(package private) void
replaceActiveFormattingElement(Element out, Element in)
private static void
replaceInQueue(java.util.ArrayList<Element> queue, Element out, Element in)
(package private) void
replaceOnStack(Element out, Element in)
(package private) void
resetBody()
Places the body back onto the stack and moves to InBody, for cases in AfterBody / AfterAfterBody when more content comes(package private) boolean
resetInsertionMode()
Reset the insertion mode, by searching up the stack for an appropriate insertion mode.(package private) void
resetPendingTableCharacters()
(package private) void
setFormElement(FormElement formElement)
(package private) void
setFosterInserts(boolean fosterInserts)
(package private) void
setHeadElement(Element headElement)
(package private) HtmlTreeBuilderState
state()
(package private) int
templateModeSize()
java.lang.String
toString()
(package private) void
transition(HtmlTreeBuilderState state)
(package private) boolean
useCurrentOrForeignInsert(Token token)
-
Methods inherited from class org.jsoup.parser.TreeBuilder
currentElement, currentElementIs, currentElementIs, defaultNamespace, error, error, onNodeClosed, onNodeInserted, parse, pop, processEndTag, processStartTag, processStartTag, push, runParser, tagFor, tagFor
-
-
-
-
Field Detail
-
TagsSearchInScope
static final java.lang.String[] TagsSearchInScope
-
TagSearchList
static final java.lang.String[] TagSearchList
-
TagSearchButton
static final java.lang.String[] TagSearchButton
-
TagSearchTableScope
static final java.lang.String[] TagSearchTableScope
-
TagSearchSelectScope
static final java.lang.String[] TagSearchSelectScope
-
TagSearchEndTags
static final java.lang.String[] TagSearchEndTags
-
TagThoroughSearchEndTags
static final java.lang.String[] TagThoroughSearchEndTags
-
TagSearchSpecial
static final java.lang.String[] TagSearchSpecial
-
TagMathMlTextIntegration
static final java.lang.String[] TagMathMlTextIntegration
-
TagSvgHtmlIntegration
static final java.lang.String[] TagSvgHtmlIntegration
-
MaxScopeSearchDepth
public static final int MaxScopeSearchDepth
- See Also:
- Constant Field Values
-
state
private HtmlTreeBuilderState state
-
originalState
private HtmlTreeBuilderState originalState
-
baseUriSetFromDoc
private boolean baseUriSetFromDoc
-
headElement
private Element headElement
-
formElement
private FormElement formElement
-
contextElement
private Element contextElement
-
formattingElements
private java.util.ArrayList<Element> formattingElements
-
tmplInsertMode
private java.util.ArrayList<HtmlTreeBuilderState> tmplInsertMode
-
pendingTableCharacters
private java.util.List<Token.Character> pendingTableCharacters
-
emptyEnd
private Token.EndTag emptyEnd
-
framesetOk
private boolean framesetOk
-
fosterInserts
private boolean fosterInserts
-
fragmentParsing
private boolean fragmentParsing
-
maxQueueDepth
private static final int maxQueueDepth
- See Also:
- Constant Field Values
-
specificScopeTarget
private final java.lang.String[] specificScopeTarget
-
maxUsedFormattingElements
private static final int maxUsedFormattingElements
- See Also:
- Constant Field Values
-
-
Method Detail
-
defaultSettings
ParseSettings defaultSettings()
- Specified by:
defaultSettings
in classTreeBuilder
-
newInstance
HtmlTreeBuilder newInstance()
Description copied from class:TreeBuilder
Create a new copy of this TreeBuilder- Specified by:
newInstance
in classTreeBuilder
- Returns:
- copy, ready for a new parse
-
initialiseParse
protected void initialiseParse(java.io.Reader input, java.lang.String baseUri, Parser parser)
- Overrides:
initialiseParse
in classTreeBuilder
-
parseFragment
java.util.List<Node> parseFragment(java.lang.String inputFragment, Element context, java.lang.String baseUri, Parser parser)
- Specified by:
parseFragment
in classTreeBuilder
-
process
protected boolean process(Token token)
- Specified by:
process
in classTreeBuilder
-
useCurrentOrForeignInsert
boolean useCurrentOrForeignInsert(Token token)
-
isMathmlTextIntegration
static boolean isMathmlTextIntegration(Element el)
-
isHtmlIntegration
static boolean isHtmlIntegration(Element el)
-
process
boolean process(Token token, HtmlTreeBuilderState state)
-
transition
void transition(HtmlTreeBuilderState state)
-
state
HtmlTreeBuilderState state()
-
markInsertionMode
void markInsertionMode()
-
originalState
HtmlTreeBuilderState originalState()
-
framesetOk
void framesetOk(boolean framesetOk)
-
framesetOk
boolean framesetOk()
-
getDocument
Document getDocument()
-
getBaseUri
java.lang.String getBaseUri()
-
maybeSetBaseUri
void maybeSetBaseUri(Element base)
-
isFragmentParsing
boolean isFragmentParsing()
-
error
void error(HtmlTreeBuilderState state)
-
createElementFor
Element createElementFor(Token.StartTag startTag, java.lang.String namespace, boolean forcePreserveCase)
-
insertElementFor
Element insertElementFor(Token.StartTag startTag)
Inserts an HTML element for the given tag)
-
insertForeignElementFor
Element insertForeignElementFor(Token.StartTag startTag, java.lang.String namespace)
Inserts a foreign element. Preserves the case of the tag name and of the attributes.
-
insertEmptyElementFor
Element insertEmptyElementFor(Token.StartTag startTag)
-
insertFormElement
FormElement insertFormElement(Token.StartTag startTag, boolean onStack, boolean checkTemplateStack)
-
doInsertElement
private void doInsertElement(Element el, Token token)
Inserts the Element onto the stack. All element inserts must run through this method. Performs any general tests on the Element before insertion.- Parameters:
el
- the Element to insert and make the current elementtoken
- the token this element was parsed from. If null, uses a zero-width current token as intrinsic insert
-
insertCommentNode
void insertCommentNode(Token.Comment token)
-
insertCharacterNode
void insertCharacterNode(Token.Character characterToken)
Inserts the provided character token into the current element.
-
insertCharacterToElement
void insertCharacterToElement(Token.Character characterToken, Element el)
Inserts the provided character token into the provided element.
-
getStack
java.util.ArrayList<Element> getStack()
-
onStack
boolean onStack(Element el)
-
onStack
boolean onStack(java.lang.String elName)
Checks if there is an HTML element with the given name on the stack.
-
getFromStack
Element getFromStack(java.lang.String elName)
Gets the nearest (lowest) HTML element with the given name from the stack.
-
removeFromStack
boolean removeFromStack(Element el)
-
popStackToClose
Element popStackToClose(java.lang.String elName)
Pops the stack until the given HTML element is removed.
-
popStackToCloseAnyNamespace
Element popStackToCloseAnyNamespace(java.lang.String elName)
Pops the stack until an element with the supplied name is removed, irrespective of namespace.
-
popStackToClose
void popStackToClose(java.lang.String... elNames)
Pops the stack until one of the given HTML elements is removed.
-
clearStackToTableContext
void clearStackToTableContext()
-
clearStackToTableBodyContext
void clearStackToTableBodyContext()
-
clearStackToTableRowContext
void clearStackToTableRowContext()
-
clearStackToContext
private void clearStackToContext(java.lang.String... nodeNames)
Removes elements from the stack until one of the supplied HTML elements is removed.
-
replaceInQueue
private static void replaceInQueue(java.util.ArrayList<Element> queue, Element out, Element in)
-
resetInsertionMode
boolean resetInsertionMode()
Reset the insertion mode, by searching up the stack for an appropriate insertion mode. The stack search depth is limited tomaxQueueDepth
.- Returns:
- true if the insertion mode was actually changed.
-
resetBody
void resetBody()
Places the body back onto the stack and moves to InBody, for cases in AfterBody / AfterAfterBody when more content comes
-
inSpecificScope
private boolean inSpecificScope(java.lang.String targetName, java.lang.String[] baseTypes, java.lang.String[] extraTypes)
-
inSpecificScope
private boolean inSpecificScope(java.lang.String[] targetNames, java.lang.String[] baseTypes, java.lang.String[] extraTypes)
-
inScope
boolean inScope(java.lang.String[] targetNames)
-
inScope
boolean inScope(java.lang.String targetName)
-
inScope
boolean inScope(java.lang.String targetName, java.lang.String[] extras)
-
inListItemScope
boolean inListItemScope(java.lang.String targetName)
-
inButtonScope
boolean inButtonScope(java.lang.String targetName)
-
inTableScope
boolean inTableScope(java.lang.String targetName)
-
inSelectScope
boolean inSelectScope(java.lang.String targetName)
-
onStackNot
boolean onStackNot(java.lang.String[] allowedTags)
Tests if there is some element on the stack that is not in the provided set.
-
setHeadElement
void setHeadElement(Element headElement)
-
getHeadElement
Element getHeadElement()
-
isFosterInserts
boolean isFosterInserts()
-
setFosterInserts
void setFosterInserts(boolean fosterInserts)
-
getFormElement
FormElement getFormElement()
-
setFormElement
void setFormElement(FormElement formElement)
-
resetPendingTableCharacters
void resetPendingTableCharacters()
-
getPendingTableCharacters
java.util.List<Token.Character> getPendingTableCharacters()
-
addPendingTableCharacters
void addPendingTableCharacters(Token.Character c)
-
generateImpliedEndTags
void generateImpliedEndTags(java.lang.String excludeTag)
13.2.6.3 Closing elements that have implied end tags When the steps below require the UA to generate implied end tags, then, while the current node is a dd element, a dt element, an li element, an optgroup element, an option element, a p element, an rb element, an rp element, an rt element, or an rtc element, the UA must pop the current node off the stack of open elements. If a step requires the UA to generate implied end tags but lists an element to exclude from the process, then the UA must perform the above steps as if that element was not in the above list. When the steps below require the UA to generate all implied end tags thoroughly, then, while the current node is a caption element, a colgroup element, a dd element, a dt element, an li element, an optgroup element, an option element, a p element, an rb element, an rp element, an rt element, an rtc element, a tbody element, a td element, a tfoot element, a th element, a thead element, or a tr element, the UA must pop the current node off the stack of open elements.- Parameters:
excludeTag
- If a step requires the UA to generate implied end tags but lists an element to exclude from the process, then the UA must perform the above steps as if that element was not in the above list.
-
generateImpliedEndTags
void generateImpliedEndTags()
-
generateImpliedEndTags
void generateImpliedEndTags(boolean thorough)
Pops HTML elements off the stack according to the implied end tag rules- Parameters:
thorough
- if we are thorough (includes table elements etc) or not
-
closeElement
void closeElement(java.lang.String name)
-
isSpecial
static boolean isSpecial(Element el)
-
lastFormattingElement
Element lastFormattingElement()
-
positionOfElement
int positionOfElement(Element el)
-
removeLastFormattingElement
Element removeLastFormattingElement()
-
pushActiveFormattingElements
void pushActiveFormattingElements(Element in)
-
pushWithBookmark
void pushWithBookmark(Element in, int bookmark)
-
checkActiveFormattingElements
void checkActiveFormattingElements(Element in)
-
reconstructFormattingElements
void reconstructFormattingElements()
-
clearFormattingElementsToLastMarker
void clearFormattingElementsToLastMarker()
-
removeFromActiveFormattingElements
void removeFromActiveFormattingElements(Element el)
-
isInActiveFormattingElements
boolean isInActiveFormattingElements(Element el)
-
getActiveFormattingElement
Element getActiveFormattingElement(java.lang.String nodeName)
-
insertMarkerToFormattingElements
void insertMarkerToFormattingElements()
-
insertInFosterParent
void insertInFosterParent(Node in)
-
pushTemplateMode
void pushTemplateMode(HtmlTreeBuilderState state)
-
popTemplateMode
HtmlTreeBuilderState popTemplateMode()
-
templateModeSize
int templateModeSize()
-
currentTemplateMode
HtmlTreeBuilderState currentTemplateMode()
-
toString
public java.lang.String toString()
- Overrides:
toString
in classjava.lang.Object
-
isContentForTagData
protected boolean isContentForTagData(java.lang.String normalName)
Description copied from class:TreeBuilder
(An internal method, visible for Element. For HTML parse, signals that script and style text should be treated as Data Nodes).- Overrides:
isContentForTagData
in classTreeBuilder
-
-