Package org.jsoup.parser
Class Tokeniser
- java.lang.Object
-
- org.jsoup.parser.Tokeniser
-
final class Tokeniser extends java.lang.Object
Readers the input stream into tokens.
-
-
Field Summary
Fields Modifier and Type Field Description (package private) Token.Character
charPending
private java.lang.StringBuilder
charsBuilder
private java.lang.String
charsString
private int
charStartPos
private int[]
codepointHolder
(package private) Token.Comment
commentPending
(package private) java.lang.StringBuilder
dataBuffer
(package private) Token.Doctype
doctypePending
private Token
emitPending
(package private) Token.EndTag
endPending
private ParseErrorList
errors
private boolean
isEmitPending
private java.lang.String
lastStartCloseSeq
private java.lang.String
lastStartTag
private int
markupStartPos
private int[]
multipointHolder
private static char[]
notCharRefCharsSorted
private CharacterReader
reader
(package private) static char
replacementChar
(package private) Token.StartTag
startPending
private TokeniserState
state
(package private) Token.Tag
tagPending
private static int
Unset
(package private) static int[]
win1252Extensions
(package private) static int
win1252ExtensionsStart
-
Constructor Summary
Constructors Constructor Description Tokeniser(TreeBuilder treeBuilder)
-
Method Summary
All Methods Static Methods Instance Methods Concrete Methods Modifier and Type Method Description (package private) void
advanceTransition(TokeniserState newState)
(package private) java.lang.String
appropriateEndTagName()
(package private) java.lang.String
appropriateEndTagSeq()
Returns the closer sequence</lastStart
private void
characterReferenceError(java.lang.String message, java.lang.Object... args)
(package private) int[]
consumeCharacterReference(java.lang.Character additionalAllowedCharacter, boolean inAttribute)
(package private) void
createBogusCommentPending()
(package private) void
createCommentPending()
(package private) void
createDoctypePending()
(package private) Token.Tag
createTagPending(boolean start)
(package private) void
createTempBuffer()
(package private) static boolean
currentNodeInHtmlNS()
(package private) void
emit(char c)
(package private) void
emit(char[] chars)
(package private) void
emit(int[] codepoints)
(package private) void
emit(java.lang.String str)
(package private) void
emit(java.lang.StringBuilder str)
(package private) void
emit(Token token)
(package private) void
emitCommentPending()
(package private) void
emitDoctypePending()
(package private) void
emitTagPending()
(package private) void
eofError(TokeniserState state)
(package private) void
error(java.lang.String errorMsg)
(package private) void
error(java.lang.String errorMsg, java.lang.Object... args)
(package private) void
error(TokeniserState state)
(package private) TokeniserState
getState()
(package private) boolean
isAppropriateEndTagToken()
(package private) Token
read()
(package private) void
transition(TokeniserState newState)
(package private) java.lang.String
unescapeEntities(boolean inAttribute)
Utility method to consume reader and unescape entities found within.
-
-
-
Field Detail
-
replacementChar
static final char replacementChar
- See Also:
- Constant Field Values
-
notCharRefCharsSorted
private static final char[] notCharRefCharsSorted
-
win1252ExtensionsStart
static final int win1252ExtensionsStart
- See Also:
- Constant Field Values
-
win1252Extensions
static final int[] win1252Extensions
-
reader
private final CharacterReader reader
-
errors
private final ParseErrorList errors
-
state
private TokeniserState state
-
emitPending
private Token emitPending
-
isEmitPending
private boolean isEmitPending
-
charsString
private java.lang.String charsString
-
charsBuilder
private final java.lang.StringBuilder charsBuilder
-
dataBuffer
final java.lang.StringBuilder dataBuffer
-
startPending
final Token.StartTag startPending
-
endPending
final Token.EndTag endPending
-
tagPending
Token.Tag tagPending
-
charPending
final Token.Character charPending
-
doctypePending
final Token.Doctype doctypePending
-
commentPending
final Token.Comment commentPending
-
lastStartTag
private java.lang.String lastStartTag
-
lastStartCloseSeq
private java.lang.String lastStartCloseSeq
-
Unset
private static final int Unset
- See Also:
- Constant Field Values
-
markupStartPos
private int markupStartPos
-
charStartPos
private int charStartPos
-
codepointHolder
private final int[] codepointHolder
-
multipointHolder
private final int[] multipointHolder
-
-
Constructor Detail
-
Tokeniser
Tokeniser(TreeBuilder treeBuilder)
-
-
Method Detail
-
read
Token read()
-
emit
void emit(Token token)
-
emit
void emit(java.lang.String str)
-
emit
void emit(java.lang.StringBuilder str)
-
emit
void emit(char c)
-
emit
void emit(char[] chars)
-
emit
void emit(int[] codepoints)
-
getState
TokeniserState getState()
-
transition
void transition(TokeniserState newState)
-
advanceTransition
void advanceTransition(TokeniserState newState)
-
consumeCharacterReference
int[] consumeCharacterReference(java.lang.Character additionalAllowedCharacter, boolean inAttribute)
-
createTagPending
Token.Tag createTagPending(boolean start)
-
emitTagPending
void emitTagPending()
-
createCommentPending
void createCommentPending()
-
emitCommentPending
void emitCommentPending()
-
createBogusCommentPending
void createBogusCommentPending()
-
createDoctypePending
void createDoctypePending()
-
emitDoctypePending
void emitDoctypePending()
-
createTempBuffer
void createTempBuffer()
-
isAppropriateEndTagToken
boolean isAppropriateEndTagToken()
-
appropriateEndTagName
java.lang.String appropriateEndTagName()
-
appropriateEndTagSeq
java.lang.String appropriateEndTagSeq()
Returns the closer sequence</lastStart
-
error
void error(TokeniserState state)
-
eofError
void eofError(TokeniserState state)
-
characterReferenceError
private void characterReferenceError(java.lang.String message, java.lang.Object... args)
-
error
void error(java.lang.String errorMsg)
-
error
void error(java.lang.String errorMsg, java.lang.Object... args)
-
currentNodeInHtmlNS
static boolean currentNodeInHtmlNS()
-
unescapeEntities
java.lang.String unescapeEntities(boolean inAttribute)
Utility method to consume reader and unescape entities found within.- Parameters:
inAttribute
- if the text to be unescaped is in an attribute- Returns:
- unescaped string from reader
-
-