Class HtmlNode
- Namespace
- HtmlAgilityPack
- Assembly
- HtmlAgilityPack.dll
Represents an HTML node.
public class HtmlNode : IXPathNavigable
- Inheritance
-
HtmlNode
- Implements
-
IXPathNavigable
- Derived
- Inherited Members
Constructors
HtmlNode(HtmlNodeType, HtmlDocument, int)
Initializes HtmlNode, providing type, owner and where it exists in a collection
public HtmlNode(HtmlNodeType type, HtmlDocument ownerdocument, int index)
Parameters
type
HtmlNodeTypeownerdocument
HtmlDocumentindex
int
Fields
ElementsFlags
Gets a collection of flags that define specific behaviors for specific element nodes. The table contains a DictionaryEntry list with the lowercase tag name as the Key, and a combination of HtmlElementFlags as the Value.
public static Dictionary<string, HtmlElementFlag> ElementsFlags
Field Value
HtmlNodeTypeNameComment
Gets the name of a comment node. It is actually defined as '#comment'.
public static readonly string HtmlNodeTypeNameComment
Field Value
HtmlNodeTypeNameDocument
Gets the name of the document node. It is actually defined as '#document'.
public static readonly string HtmlNodeTypeNameDocument
Field Value
HtmlNodeTypeNameText
Gets the name of a text node. It is actually defined as '#text'.
public static readonly string HtmlNodeTypeNameText
Field Value
Properties
Attributes
Gets the collection of HTML attributes for this node. May not be null.
public HtmlAttributeCollection Attributes { get; }
Property Value
ChildNodes
Gets all the children of the node.
public HtmlNodeCollection ChildNodes { get; }
Property Value
Closed
Gets a value indicating if this node has been closed or not.
public bool Closed { get; }
Property Value
ClosingAttributes
Gets the collection of HTML attributes for the closing tag. May not be null.
public HtmlAttributeCollection ClosingAttributes { get; }
Property Value
Depth
The depth of the node relative to the opening root html element. This value is used to determine if a document has to many nested html nodes which can cause stack overflows
public int Depth { get; set; }
Property Value
EndNode
Gets the closing tag of the node, null if the node is self-closing.
public HtmlNode EndNode { get; }
Property Value
FirstChild
Gets the first child of the node.
public HtmlNode FirstChild { get; }
Property Value
HasAttributes
Gets a value indicating whether the current node has any attributes.
public bool HasAttributes { get; }
Property Value
HasChildNodes
Gets a value indicating whether this node has any child nodes.
public bool HasChildNodes { get; }
Property Value
HasClosingAttributes
Gets a value indicating whether the current node has any attributes on the closing tag.
public bool HasClosingAttributes { get; }
Property Value
Id
Gets or sets the value of the 'id' HTML attribute. The document must have been parsed using the OptionUseIdAttribute set to true.
public string Id { get; set; }
Property Value
InnerHtml
Gets or Sets the HTML between the start and end tags of the object.
public virtual string InnerHtml { get; set; }
Property Value
InnerLength
Gets the length of the area between the opening and closing tag of the node.
public int InnerLength { get; }
Property Value
InnerStartIndex
Gets the stream position of the area between the opening and closing tag of the node, relative to the start of the document.
public int InnerStartIndex { get; }
Property Value
InnerText
Gets the text between the start and end tags of the object.
public virtual string InnerText { get; }
Property Value
LastChild
Gets the last child of the node.
public HtmlNode LastChild { get; }
Property Value
Line
Gets the line number of this node in the document.
public int Line { get; }
Property Value
LinePosition
Gets the column number of this node in the document.
public int LinePosition { get; }
Property Value
Name
Gets or sets this node's name.
public string Name { get; set; }
Property Value
NextSibling
Gets the HTML node immediately following this element.
public HtmlNode NextSibling { get; }
Property Value
NodeType
Gets the type of this node.
public HtmlNodeType NodeType { get; }
Property Value
OriginalName
The original unaltered name of the tag
public string OriginalName { get; }
Property Value
OuterHtml
Gets or Sets the object and its content in HTML.
public virtual string OuterHtml { get; }
Property Value
OuterLength
Gets the length of the entire node, opening and closing tag included.
public int OuterLength { get; }
Property Value
OuterStartIndex
Gets the stream position of the area of the beginning of the tag, relative to the start of the document.
public int OuterStartIndex { get; }
Property Value
OwnerDocument
Gets the HtmlDocument to which this node belongs.
public HtmlDocument OwnerDocument { get; }
Property Value
ParentNode
Gets the parent of this node (for nodes that can have parents).
public HtmlNode ParentNode { get; }
Property Value
PreviousSibling
Gets the node immediately preceding this node.
public HtmlNode PreviousSibling { get; }
Property Value
StreamPosition
Gets the stream position of this node in the document, relative to the start of the document.
public int StreamPosition { get; }
Property Value
XPath
Gets a valid XPath string that points to this node
public string XPath { get; }
Property Value
Methods
AddClass(string)
Adds one or more classes to this node.
public void AddClass(string name)
Parameters
name
stringThe node list to add. May not be null.
AddClass(string, bool)
Adds one or more classes to this node.
public void AddClass(string name, bool throwError)
Parameters
name
stringThe node list to add. May not be null.
throwError
booltrue to throw Error if class name exists, false otherwise.
Ancestors()
Returns a collection of all ancestor nodes of this element.
public IEnumerable<HtmlNode> Ancestors()
Returns
Ancestors(string)
Get Ancestors with matching name
public IEnumerable<HtmlNode> Ancestors(string name)
Parameters
name
string
Returns
AncestorsAndSelf()
Returns a collection of all ancestor nodes of this element.
public IEnumerable<HtmlNode> AncestorsAndSelf()
Returns
AncestorsAndSelf(string)
Gets all anscestor nodes and the current node
public IEnumerable<HtmlNode> AncestorsAndSelf(string name)
Parameters
name
string
Returns
AppendChild(HtmlNode)
Adds the specified node to the end of the list of children of this node.
public HtmlNode AppendChild(HtmlNode newChild)
Parameters
newChild
HtmlNodeThe node to add. May not be null.
Returns
- HtmlNode
The node added.
AppendChildren(HtmlNodeCollection)
Adds the specified node to the end of the list of children of this node.
public void AppendChildren(HtmlNodeCollection newChildren)
Parameters
newChildren
HtmlNodeCollectionThe node list to add. May not be null.
CanOverlapElement(string)
Determines if an element node can be kept overlapped.
public static bool CanOverlapElement(string name)
Parameters
name
stringThe name of the element node to check. May not be
null
.
Returns
- bool
true if the name is the name of an element node that can be kept overlapped,
false
otherwise.
ChildAttributes(string)
Gets all Attributes with name
public IEnumerable<HtmlAttribute> ChildAttributes(string name)
Parameters
name
string
Returns
Clone()
Creates a duplicate of the node
public HtmlNode Clone()
Returns
CloneNode(bool)
Creates a duplicate of the node.
public HtmlNode CloneNode(bool deep)
Parameters
deep
booltrue to recursively clone the subtree under the specified node; false to clone only the node itself.
Returns
- HtmlNode
The cloned node.
CloneNode(string)
Creates a duplicate of the node and changes its name at the same time.
public HtmlNode CloneNode(string newName)
Parameters
newName
stringThe new name of the cloned node. May not be
null
.
Returns
- HtmlNode
The cloned node.
CloneNode(string, bool)
Creates a duplicate of the node and changes its name at the same time.
public HtmlNode CloneNode(string newName, bool deep)
Parameters
newName
stringThe new name of the cloned node. May not be null.
deep
booltrue to recursively clone the subtree under the specified node; false to clone only the node itself.
Returns
- HtmlNode
The cloned node.
CopyFrom(HtmlNode)
Creates a duplicate of the node and the subtree under it.
public void CopyFrom(HtmlNode node)
Parameters
node
HtmlNodeThe node to duplicate. May not be
null
.
CopyFrom(HtmlNode, bool)
Creates a duplicate of the node.
public void CopyFrom(HtmlNode node, bool deep)
Parameters
node
HtmlNodeThe node to duplicate. May not be
null
.deep
booltrue to recursively clone the subtree under the specified node, false to clone only the node itself.
CreateNavigator()
Creates a new XPathNavigator object for navigating this HTML node.
public XPathNavigator CreateNavigator()
Returns
- XPathNavigator
An XPathNavigator object. The XPathNavigator is positioned on the node from which the method was called. It is not positioned on the root of the document.
CreateNode(string)
Creates an HTML node from a string representing literal HTML.
public static HtmlNode CreateNode(string html)
Parameters
html
stringThe HTML text.
Returns
- HtmlNode
The newly created node instance.
CreateNode(string, Action<HtmlDocument>)
Creates an HTML node from a string representing literal HTML.
public static HtmlNode CreateNode(string html, Action<HtmlDocument> htmlDocumentBuilder)
Parameters
html
stringThe HTML text.
htmlDocumentBuilder
Action<HtmlDocument>The HTML Document builder.
Returns
- HtmlNode
The newly created node instance.
CreateRootNavigator()
Creates an XPathNavigator using the root of this document.
public XPathNavigator CreateRootNavigator()
Returns
- XPathNavigator
DescendantNodes(int)
Gets all Descendant nodes for this node and each of child nodes
[Obsolete("Use Descendants() instead, the results of this function will change in a future version")]
public IEnumerable<HtmlNode> DescendantNodes(int level = 0)
Parameters
level
intThe depth level of the node to parse in the html tree
Returns
- IEnumerable<HtmlNode>
the current element as an HtmlNode
DescendantNodesAndSelf()
Returns a collection of all descendant nodes of this element, in document order
[Obsolete("Use DescendantsAndSelf() instead, the results of this function will change in a future version")]
public IEnumerable<HtmlNode> DescendantNodesAndSelf()
Returns
Descendants()
Gets all Descendant nodes in enumerated list
public IEnumerable<HtmlNode> Descendants()
Returns
Descendants(int)
Gets all Descendant nodes in enumerated list
public IEnumerable<HtmlNode> Descendants(int level)
Parameters
level
int
Returns
Descendants(string)
Get all descendant nodes with matching name
public IEnumerable<HtmlNode> Descendants(string name)
Parameters
name
string
Returns
DescendantsAndSelf()
Returns a collection of all descendant nodes of this element, in document order
public IEnumerable<HtmlNode> DescendantsAndSelf()
Returns
DescendantsAndSelf(string)
Gets all descendant nodes including this node
public IEnumerable<HtmlNode> DescendantsAndSelf(string name)
Parameters
name
string
Returns
Element(string)
Gets first generation child node matching name
public HtmlNode Element(string name)
Parameters
name
string
Returns
Elements(string)
Gets matching first generation child nodes matching name
public IEnumerable<HtmlNode> Elements(string name)
Parameters
name
string
Returns
GetAttributeValue(string, bool)
Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
public bool GetAttributeValue(string name, bool def)
Parameters
name
stringThe name of the attribute to get. May not be
null
.def
boolThe default value to return if not found.
Returns
- bool
The value of the attribute if found, the default value if not found.
GetAttributeValue(string, int)
Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
public int GetAttributeValue(string name, int def)
Parameters
name
stringThe name of the attribute to get. May not be
null
.def
intThe default value to return if not found.
Returns
- int
The value of the attribute if found, the default value if not found.
GetAttributeValue(string, string)
Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
public string GetAttributeValue(string name, string def)
Parameters
name
stringThe name of the attribute to get. May not be
null
.def
stringThe default value to return if not found.
Returns
- string
The value of the attribute if found, the default value if not found.
GetAttributeValue<T>(string, T)
Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
public T GetAttributeValue<T>(string name, T def)
Parameters
name
stringThe name of the attribute to get. May not be
null
.def
TThe default value to return if not found.
Returns
- T
The value of the attribute if found, the default value if not found.
Type Parameters
T
GetAttributes()
Gets the attributes in this collection.
public IEnumerable<HtmlAttribute> GetAttributes()
Returns
- IEnumerable<HtmlAttribute>
An enumerator that allows foreach to be used to process the attributes in this collection.
GetAttributes(params string[])
Gets the attributes in this collection.
public IEnumerable<HtmlAttribute> GetAttributes(params string[] attributeNames)
Parameters
attributeNames
string[]A variable-length parameters list containing attribute names.
Returns
- IEnumerable<HtmlAttribute>
An enumerator that allows foreach to be used to process the attributes in this collection.
GetClasses()
Gets the CSS Class from the node.
public IEnumerable<string> GetClasses()
Returns
- IEnumerable<string>
The CSS Class from the node
GetDataAttribute(string)
Gets data attribute.
public HtmlAttribute GetDataAttribute(string key)
Parameters
key
stringThe key.
Returns
- HtmlAttribute
The data attribute.
GetDataAttributes()
Gets the data attributes in this collection.
public IEnumerable<HtmlAttribute> GetDataAttributes()
Returns
- IEnumerable<HtmlAttribute>
An enumerator that allows foreach to be used to process the data attributes in this collection.
GetDirectInnerText()
Gets direct inner text.
public virtual string GetDirectInnerText()
Returns
- string
The direct inner text.
GetEncapsulatedData(Type, HtmlDocument)
Fill an object and go through it's properties and fill them too.
public object GetEncapsulatedData(Type targetType, HtmlDocument htmlDocument = null)
Parameters
targetType
TypeType of object to want to fill. It should have atleast one property that defined XPath.
htmlDocument
HtmlDocumentIf htmlDocument includes data , leave this parameter null. Else pass your specific htmldocument.
Returns
- object
Returns an object of type targetType including Encapsulated data.
Exceptions
- ArgumentException
Why it's thrown.
- ArgumentNullException
Why it's thrown.
- MissingMethodException
- MissingXPathException
- System.Xml.XPath.XPathException
XPathExeption
- NodeNotFoundException
- NodeAttributeNotFoundException
- FormatException
Why it's thrown.
- Exception
Why it's thrown.
- InvalidNodeReturnTypeException
GetEncapsulatedData<T>()
Fill an object and go through it's properties and fill them too.
public T GetEncapsulatedData<T>()
Returns
- T
Returns an object of type T including Encapsulated data.
Type Parameters
T
Type of object to want to fill. It should have atleast one property that defined XPath.
Exceptions
- ArgumentException
Why it's thrown.
- ArgumentNullException
Why it's thrown.
- MissingMethodException
- MissingXPathException
- System.Xml.XPath.XPathException
XPathExeption
- NodeNotFoundException
- NodeAttributeNotFoundException
- FormatException
Why it's thrown.
- Exception
Why it's thrown.
- InvalidNodeReturnTypeException
GetEncapsulatedData<T>(HtmlDocument)
Fill an object and go through it's properties and fill them too.
public T GetEncapsulatedData<T>(HtmlDocument htmlDocument)
Parameters
htmlDocument
HtmlDocumentIf htmlDocument includes data , leave this parameter null. Else pass your specific htmldocument.
Returns
- T
Returns an object of type T including Encapsulated data.
Type Parameters
T
Type of object to want to fill. It should have atleast one property that defined XPath.
Exceptions
- ArgumentException
Why it's thrown.
- ArgumentNullException
Why it's thrown.
- MissingMethodException
- MissingXPathException
- System.Xml.XPath.XPathException
XPathExeption
- NodeNotFoundException
- NodeAttributeNotFoundException
- FormatException
Why it's thrown.
- Exception
Why it's thrown.
- InvalidNodeReturnTypeException
HasClass(string)
Check if the node class has the parameter class.
public bool HasClass(string className)
Parameters
className
string
Returns
- bool
True if node class has the parameter class, false if not.
InsertAfter(HtmlNode, HtmlNode)
Inserts the specified node immediately after the specified reference node.
public HtmlNode InsertAfter(HtmlNode newChild, HtmlNode refChild)
Parameters
newChild
HtmlNodeThe node to insert. May not be
null
.refChild
HtmlNodeThe node that is the reference node. The newNode is placed after the refNode.
Returns
- HtmlNode
The node being inserted.
InsertBefore(HtmlNode, HtmlNode)
Inserts the specified node immediately before the specified reference node.
public HtmlNode InsertBefore(HtmlNode newChild, HtmlNode refChild)
Parameters
newChild
HtmlNodeThe node to insert. May not be
null
.refChild
HtmlNodeThe node that is the reference node. The newChild is placed before this node.
Returns
- HtmlNode
The node being inserted.
IsCDataElement(string)
Determines if an element node is a CDATA element node.
public static bool IsCDataElement(string name)
Parameters
name
stringThe name of the element node to check. May not be null.
Returns
- bool
true if the name is the name of a CDATA element node, false otherwise.
IsClosedElement(string)
Determines if an element node is closed.
public static bool IsClosedElement(string name)
Parameters
name
stringThe name of the element node to check. May not be null.
Returns
- bool
true if the name is the name of a closed element node, false otherwise.
IsEmptyElement(string)
Determines if an element node is defined as empty.
public static bool IsEmptyElement(string name)
Parameters
name
stringThe name of the element node to check. May not be null.
Returns
- bool
true if the name is the name of an empty element node, false otherwise.
IsOverlappedClosingElement(string)
Determines if a text corresponds to the closing tag of an node that can be kept overlapped.
public static bool IsOverlappedClosingElement(string text)
Parameters
text
stringThe text to check. May not be null.
Returns
- bool
true or false.
MoveChild(HtmlNode)
Move a node already associated and append it to this node instead.
public void MoveChild(HtmlNode child)
Parameters
child
HtmlNodeThe child node to move.
MoveChildren(HtmlNodeCollection)
Move a children collection already associated and append it to this node instead.
public void MoveChildren(HtmlNodeCollection children)
Parameters
children
HtmlNodeCollectionThe children collection already associated to move to another node.
PrependChild(HtmlNode)
Adds the specified node to the beginning of the list of children of this node.
public HtmlNode PrependChild(HtmlNode newChild)
Parameters
newChild
HtmlNodeThe node to add. May not be
null
.
Returns
- HtmlNode
The node added.
PrependChildren(HtmlNodeCollection)
Adds the specified node list to the beginning of the list of children of this node.
public void PrependChildren(HtmlNodeCollection newChildren)
Parameters
newChildren
HtmlNodeCollectionThe node list to add. May not be
null
.
Remove()
Removes node from parent collection
public void Remove()
RemoveAll()
Removes all the children and/or attributes of the current node.
public void RemoveAll()
RemoveAllChildren()
Removes all the children of the current node.
public void RemoveAllChildren()
RemoveAllIDforNode(HtmlNode)
Removes all id for node described by node.
public void RemoveAllIDforNode(HtmlNode node)
Parameters
node
HtmlNodeThe node.
RemoveChild(HtmlNode)
Removes the specified child node.
public HtmlNode RemoveChild(HtmlNode oldChild)
Parameters
oldChild
HtmlNodeThe node being removed. May not be
null
.
Returns
- HtmlNode
The node removed.
RemoveChild(HtmlNode, bool)
Removes the specified child node.
public HtmlNode RemoveChild(HtmlNode oldChild, bool keepGrandChildren)
Parameters
oldChild
HtmlNodeThe node being removed. May not be
null
.keepGrandChildren
booltrue to keep grand children of the node, false otherwise.
Returns
- HtmlNode
The node removed.
RemoveChildren(HtmlNodeCollection)
Removes the children collection for this node.
public void RemoveChildren(HtmlNodeCollection oldChildren)
Parameters
oldChildren
HtmlNodeCollectionThe old children collection to remove.
RemoveClass()
Removes the class attribute from the node.
public void RemoveClass()
RemoveClass(bool)
Removes the class attribute from the node.
public void RemoveClass(bool throwError)
Parameters
throwError
booltrue to throw Error if class name doesn't exist, false otherwise.
RemoveClass(string)
Removes the specified class from the node.
public void RemoveClass(string name)
Parameters
name
stringThe class being removed. May not be
null
.
RemoveClass(string, bool)
Removes the specified class from the node.
public void RemoveClass(string name, bool throwError)
Parameters
name
stringThe class being removed. May not be
null
.throwError
booltrue to throw Error if class name doesn't exist, false otherwise.
ReplaceChild(HtmlNode, HtmlNode)
Replaces the child node oldChild with newChild node.
public HtmlNode ReplaceChild(HtmlNode newChild, HtmlNode oldChild)
Parameters
newChild
HtmlNodeThe new node to put in the child list.
oldChild
HtmlNodeThe node being replaced in the list.
Returns
- HtmlNode
The node replaced.
ReplaceClass(string, string)
Replaces the class name oldClass with newClass name.
public void ReplaceClass(string newClass, string oldClass)
Parameters
ReplaceClass(string, string, bool)
Replaces the class name oldClass with newClass name.
public void ReplaceClass(string newClass, string oldClass, bool throwError)
Parameters
newClass
stringThe new class name.
oldClass
stringThe class being replaced.
throwError
booltrue to throw Error if class name doesn't exist, false otherwise.
SelectNodes(string)
Selects a list of nodes matching the XPath expression.
public HtmlNodeCollection SelectNodes(string xpath)
Parameters
xpath
stringThe XPath expression.
Returns
- HtmlNodeCollection
An HtmlNodeCollection containing a collection of nodes matching the XPath query, or
null
if no node matched the XPath expression.
SelectNodes(XPathExpression)
Selects a list of nodes matching the XPath expression.
public HtmlNodeCollection SelectNodes(XPathExpression xpath)
Parameters
xpath
XPathExpressionThe XPath expression.
Returns
- HtmlNodeCollection
An HtmlNodeCollection containing a collection of nodes matching the XPath query, or
null
if no node matched the XPath expression.
SelectSingleNode(string)
Selects the first XmlNode that matches the XPath expression.
public HtmlNode SelectSingleNode(string xpath)
Parameters
xpath
stringThe XPath expression. May not be null.
Returns
- HtmlNode
The first HtmlNode that matches the XPath query or a null reference if no matching node was found.
SelectSingleNode(XPathExpression)
Selects the first XmlNode that matches the XPath expression.
public HtmlNode SelectSingleNode(XPathExpression xpath)
Parameters
xpath
XPathExpressionThe XPath expression.
Returns
- HtmlNode
An HtmlNodeCollection containing a collection of nodes matching the XPath query, or
null
if no node matched the XPath expression.
SetAttributeValue(string, string)
Helper method to set the value of an attribute of this node. If the attribute is not found, it will be created automatically.
public HtmlAttribute SetAttributeValue(string name, string value)
Parameters
name
stringThe name of the attribute to set. May not be null.
value
stringThe value for the attribute.
Returns
- HtmlAttribute
The corresponding attribute instance.
SetChildNodesId(HtmlNode)
Sets child nodes identifier.
public void SetChildNodesId(HtmlNode chilNode)
Parameters
chilNode
HtmlNodeThe chil node.
SetParent(HtmlNode)
Sets the parent Html node and properly determines the current node's depth using the parent node's depth.
public void SetParent(HtmlNode parent)
Parameters
parent
HtmlNode
WriteContentTo()
Saves all the children of the node to a string.
public string WriteContentTo()
Returns
- string
The saved string.
WriteContentTo(TextWriter, int)
Saves all the children of the node to the specified TextWriter.
public void WriteContentTo(TextWriter outText, int level = 0)
Parameters
outText
TextWriterThe TextWriter to which you want to save.
level
intIdentifies the level we are in starting at root with 0
WriteTo()
Saves the current node to a string.
public string WriteTo()
Returns
- string
The saved string.
WriteTo(TextWriter, int)
Saves the current node to the specified TextWriter.
public virtual void WriteTo(TextWriter outText, int level = 0)
Parameters
outText
TextWriterThe TextWriter to which you want to save.
level
intidentifies the level we are in starting at root with 0
WriteTo(XmlWriter)
Saves the current node to the specified XmlWriter.
public void WriteTo(XmlWriter writer)
Parameters
writer
XmlWriterThe XmlWriter to which you want to save.