Table of Contents

Class HtmlNode

Namespace
HtmlAgilityPack
Assembly
HtmlAgilityPack.dll

Represents an HTML node.

public class HtmlNode : IXPathNavigable
Inheritance
HtmlNode
Implements
IXPathNavigable
Derived
Inherited Members

Constructors

HtmlNode(HtmlNodeType, HtmlDocument, int)

Initializes HtmlNode, providing type, owner and where it exists in a collection

public HtmlNode(HtmlNodeType type, HtmlDocument ownerdocument, int index)

Parameters

type HtmlNodeType
ownerdocument HtmlDocument
index int

Fields

ElementsFlags

Gets a collection of flags that define specific behaviors for specific element nodes. The table contains a DictionaryEntry list with the lowercase tag name as the Key, and a combination of HtmlElementFlags as the Value.

public static Dictionary<string, HtmlElementFlag> ElementsFlags

Field Value

Dictionary<string, HtmlElementFlag>

HtmlNodeTypeNameComment

Gets the name of a comment node. It is actually defined as '#comment'.

public static readonly string HtmlNodeTypeNameComment

Field Value

string

HtmlNodeTypeNameDocument

Gets the name of the document node. It is actually defined as '#document'.

public static readonly string HtmlNodeTypeNameDocument

Field Value

string

HtmlNodeTypeNameText

Gets the name of a text node. It is actually defined as '#text'.

public static readonly string HtmlNodeTypeNameText

Field Value

string

Properties

Attributes

Gets the collection of HTML attributes for this node. May not be null.

public HtmlAttributeCollection Attributes { get; }

Property Value

HtmlAttributeCollection

ChildNodes

Gets all the children of the node.

public HtmlNodeCollection ChildNodes { get; }

Property Value

HtmlNodeCollection

Closed

Gets a value indicating if this node has been closed or not.

public bool Closed { get; }

Property Value

bool

ClosingAttributes

Gets the collection of HTML attributes for the closing tag. May not be null.

public HtmlAttributeCollection ClosingAttributes { get; }

Property Value

HtmlAttributeCollection

Depth

The depth of the node relative to the opening root html element. This value is used to determine if a document has to many nested html nodes which can cause stack overflows

public int Depth { get; set; }

Property Value

int

EndNode

Gets the closing tag of the node, null if the node is self-closing.

public HtmlNode EndNode { get; }

Property Value

HtmlNode

FirstChild

Gets the first child of the node.

public HtmlNode FirstChild { get; }

Property Value

HtmlNode

HasAttributes

Gets a value indicating whether the current node has any attributes.

public bool HasAttributes { get; }

Property Value

bool

HasChildNodes

Gets a value indicating whether this node has any child nodes.

public bool HasChildNodes { get; }

Property Value

bool

HasClosingAttributes

Gets a value indicating whether the current node has any attributes on the closing tag.

public bool HasClosingAttributes { get; }

Property Value

bool

Id

Gets or sets the value of the 'id' HTML attribute. The document must have been parsed using the OptionUseIdAttribute set to true.

public string Id { get; set; }

Property Value

string

InnerHtml

Gets or Sets the HTML between the start and end tags of the object.

public virtual string InnerHtml { get; set; }

Property Value

string

InnerLength

Gets the length of the area between the opening and closing tag of the node.

public int InnerLength { get; }

Property Value

int

InnerStartIndex

Gets the stream position of the area between the opening and closing tag of the node, relative to the start of the document.

public int InnerStartIndex { get; }

Property Value

int

InnerText

Gets the text between the start and end tags of the object.

public virtual string InnerText { get; }

Property Value

string

LastChild

Gets the last child of the node.

public HtmlNode LastChild { get; }

Property Value

HtmlNode

Line

Gets the line number of this node in the document.

public int Line { get; }

Property Value

int

LinePosition

Gets the column number of this node in the document.

public int LinePosition { get; }

Property Value

int

Name

Gets or sets this node's name.

public string Name { get; set; }

Property Value

string

NextSibling

Gets the HTML node immediately following this element.

public HtmlNode NextSibling { get; }

Property Value

HtmlNode

NodeType

Gets the type of this node.

public HtmlNodeType NodeType { get; }

Property Value

HtmlNodeType

OriginalName

The original unaltered name of the tag

public string OriginalName { get; }

Property Value

string

OuterHtml

Gets or Sets the object and its content in HTML.

public virtual string OuterHtml { get; }

Property Value

string

OuterLength

Gets the length of the entire node, opening and closing tag included.

public int OuterLength { get; }

Property Value

int

OuterStartIndex

Gets the stream position of the area of the beginning of the tag, relative to the start of the document.

public int OuterStartIndex { get; }

Property Value

int

OwnerDocument

Gets the HtmlDocument to which this node belongs.

public HtmlDocument OwnerDocument { get; }

Property Value

HtmlDocument

ParentNode

Gets the parent of this node (for nodes that can have parents).

public HtmlNode ParentNode { get; }

Property Value

HtmlNode

PreviousSibling

Gets the node immediately preceding this node.

public HtmlNode PreviousSibling { get; }

Property Value

HtmlNode

StreamPosition

Gets the stream position of this node in the document, relative to the start of the document.

public int StreamPosition { get; }

Property Value

int

XPath

Gets a valid XPath string that points to this node

public string XPath { get; }

Property Value

string

Methods

AddClass(string)

Adds one or more classes to this node.

public void AddClass(string name)

Parameters

name string

The node list to add. May not be null.

AddClass(string, bool)

Adds one or more classes to this node.

public void AddClass(string name, bool throwError)

Parameters

name string

The node list to add. May not be null.

throwError bool

true to throw Error if class name exists, false otherwise.

Ancestors()

Returns a collection of all ancestor nodes of this element.

public IEnumerable<HtmlNode> Ancestors()

Returns

IEnumerable<HtmlNode>

Ancestors(string)

Get Ancestors with matching name

public IEnumerable<HtmlNode> Ancestors(string name)

Parameters

name string

Returns

IEnumerable<HtmlNode>

AncestorsAndSelf()

Returns a collection of all ancestor nodes of this element.

public IEnumerable<HtmlNode> AncestorsAndSelf()

Returns

IEnumerable<HtmlNode>

AncestorsAndSelf(string)

Gets all anscestor nodes and the current node

public IEnumerable<HtmlNode> AncestorsAndSelf(string name)

Parameters

name string

Returns

IEnumerable<HtmlNode>

AppendChild(HtmlNode)

Adds the specified node to the end of the list of children of this node.

public HtmlNode AppendChild(HtmlNode newChild)

Parameters

newChild HtmlNode

The node to add. May not be null.

Returns

HtmlNode

The node added.

AppendChildren(HtmlNodeCollection)

Adds the specified node to the end of the list of children of this node.

public void AppendChildren(HtmlNodeCollection newChildren)

Parameters

newChildren HtmlNodeCollection

The node list to add. May not be null.

CanOverlapElement(string)

Determines if an element node can be kept overlapped.

public static bool CanOverlapElement(string name)

Parameters

name string

The name of the element node to check. May not be null.

Returns

bool

true if the name is the name of an element node that can be kept overlapped, false otherwise.

ChildAttributes(string)

Gets all Attributes with name

public IEnumerable<HtmlAttribute> ChildAttributes(string name)

Parameters

name string

Returns

IEnumerable<HtmlAttribute>

Clone()

Creates a duplicate of the node

public HtmlNode Clone()

Returns

HtmlNode

CloneNode(bool)

Creates a duplicate of the node.

public HtmlNode CloneNode(bool deep)

Parameters

deep bool

true to recursively clone the subtree under the specified node; false to clone only the node itself.

Returns

HtmlNode

The cloned node.

CloneNode(string)

Creates a duplicate of the node and changes its name at the same time.

public HtmlNode CloneNode(string newName)

Parameters

newName string

The new name of the cloned node. May not be null.

Returns

HtmlNode

The cloned node.

CloneNode(string, bool)

Creates a duplicate of the node and changes its name at the same time.

public HtmlNode CloneNode(string newName, bool deep)

Parameters

newName string

The new name of the cloned node. May not be null.

deep bool

true to recursively clone the subtree under the specified node; false to clone only the node itself.

Returns

HtmlNode

The cloned node.

CopyFrom(HtmlNode)

Creates a duplicate of the node and the subtree under it.

public void CopyFrom(HtmlNode node)

Parameters

node HtmlNode

The node to duplicate. May not be null.

CopyFrom(HtmlNode, bool)

Creates a duplicate of the node.

public void CopyFrom(HtmlNode node, bool deep)

Parameters

node HtmlNode

The node to duplicate. May not be null.

deep bool

true to recursively clone the subtree under the specified node, false to clone only the node itself.

CreateNavigator()

Creates a new XPathNavigator object for navigating this HTML node.

public XPathNavigator CreateNavigator()

Returns

XPathNavigator

An XPathNavigator object. The XPathNavigator is positioned on the node from which the method was called. It is not positioned on the root of the document.

CreateNode(string)

Creates an HTML node from a string representing literal HTML.

public static HtmlNode CreateNode(string html)

Parameters

html string

The HTML text.

Returns

HtmlNode

The newly created node instance.

CreateNode(string, Action<HtmlDocument>)

Creates an HTML node from a string representing literal HTML.

public static HtmlNode CreateNode(string html, Action<HtmlDocument> htmlDocumentBuilder)

Parameters

html string

The HTML text.

htmlDocumentBuilder Action<HtmlDocument>

The HTML Document builder.

Returns

HtmlNode

The newly created node instance.

CreateRootNavigator()

Creates an XPathNavigator using the root of this document.

public XPathNavigator CreateRootNavigator()

Returns

XPathNavigator

DescendantNodes(int)

Gets all Descendant nodes for this node and each of child nodes

[Obsolete("Use Descendants() instead, the results of this function will change in a future version")]
public IEnumerable<HtmlNode> DescendantNodes(int level = 0)

Parameters

level int

The depth level of the node to parse in the html tree

Returns

IEnumerable<HtmlNode>

the current element as an HtmlNode

DescendantNodesAndSelf()

Returns a collection of all descendant nodes of this element, in document order

[Obsolete("Use DescendantsAndSelf() instead, the results of this function will change in a future version")]
public IEnumerable<HtmlNode> DescendantNodesAndSelf()

Returns

IEnumerable<HtmlNode>

Descendants()

Gets all Descendant nodes in enumerated list

public IEnumerable<HtmlNode> Descendants()

Returns

IEnumerable<HtmlNode>

Descendants(int)

Gets all Descendant nodes in enumerated list

public IEnumerable<HtmlNode> Descendants(int level)

Parameters

level int

Returns

IEnumerable<HtmlNode>

Descendants(string)

Get all descendant nodes with matching name

public IEnumerable<HtmlNode> Descendants(string name)

Parameters

name string

Returns

IEnumerable<HtmlNode>

DescendantsAndSelf()

Returns a collection of all descendant nodes of this element, in document order

public IEnumerable<HtmlNode> DescendantsAndSelf()

Returns

IEnumerable<HtmlNode>

DescendantsAndSelf(string)

Gets all descendant nodes including this node

public IEnumerable<HtmlNode> DescendantsAndSelf(string name)

Parameters

name string

Returns

IEnumerable<HtmlNode>

Element(string)

Gets first generation child node matching name

public HtmlNode Element(string name)

Parameters

name string

Returns

HtmlNode

Elements(string)

Gets matching first generation child nodes matching name

public IEnumerable<HtmlNode> Elements(string name)

Parameters

name string

Returns

IEnumerable<HtmlNode>

GetAttributeValue(string, bool)

Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.

public bool GetAttributeValue(string name, bool def)

Parameters

name string

The name of the attribute to get. May not be null.

def bool

The default value to return if not found.

Returns

bool

The value of the attribute if found, the default value if not found.

GetAttributeValue(string, int)

Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.

public int GetAttributeValue(string name, int def)

Parameters

name string

The name of the attribute to get. May not be null.

def int

The default value to return if not found.

Returns

int

The value of the attribute if found, the default value if not found.

GetAttributeValue(string, string)

Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.

public string GetAttributeValue(string name, string def)

Parameters

name string

The name of the attribute to get. May not be null.

def string

The default value to return if not found.

Returns

string

The value of the attribute if found, the default value if not found.

GetAttributeValue<T>(string, T)

Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.

public T GetAttributeValue<T>(string name, T def)

Parameters

name string

The name of the attribute to get. May not be null.

def T

The default value to return if not found.

Returns

T

The value of the attribute if found, the default value if not found.

Type Parameters

T

GetAttributes()

Gets the attributes in this collection.

public IEnumerable<HtmlAttribute> GetAttributes()

Returns

IEnumerable<HtmlAttribute>

An enumerator that allows foreach to be used to process the attributes in this collection.

GetAttributes(params string[])

Gets the attributes in this collection.

public IEnumerable<HtmlAttribute> GetAttributes(params string[] attributeNames)

Parameters

attributeNames string[]

A variable-length parameters list containing attribute names.

Returns

IEnumerable<HtmlAttribute>

An enumerator that allows foreach to be used to process the attributes in this collection.

GetClasses()

Gets the CSS Class from the node.

public IEnumerable<string> GetClasses()

Returns

IEnumerable<string>

The CSS Class from the node

GetDataAttribute(string)

Gets data attribute.

public HtmlAttribute GetDataAttribute(string key)

Parameters

key string

The key.

Returns

HtmlAttribute

The data attribute.

GetDataAttributes()

Gets the data attributes in this collection.

public IEnumerable<HtmlAttribute> GetDataAttributes()

Returns

IEnumerable<HtmlAttribute>

An enumerator that allows foreach to be used to process the data attributes in this collection.

GetDirectInnerText()

Gets direct inner text.

public virtual string GetDirectInnerText()

Returns

string

The direct inner text.

GetEncapsulatedData(Type, HtmlDocument)

Fill an object and go through it's properties and fill them too.

public object GetEncapsulatedData(Type targetType, HtmlDocument htmlDocument = null)

Parameters

targetType Type

Type of object to want to fill. It should have atleast one property that defined XPath.

htmlDocument HtmlDocument

If htmlDocument includes data , leave this parameter null. Else pass your specific htmldocument.

Returns

object

Returns an object of type targetType including Encapsulated data.

Exceptions

ArgumentException

Why it's thrown.

ArgumentNullException

Why it's thrown.

MissingMethodException

MissingMethodException

MissingXPathException

MissingXPathException

System.Xml.XPath.XPathException

XPathExeption

NodeNotFoundException

NodeNotFoundException

NodeAttributeNotFoundException

NodeAttributeNotFoundException

FormatException

Why it's thrown.

Exception

Why it's thrown.

InvalidNodeReturnTypeException

InvalidNodeReturnTypeException

GetEncapsulatedData<T>()

Fill an object and go through it's properties and fill them too.

public T GetEncapsulatedData<T>()

Returns

T

Returns an object of type T including Encapsulated data.

Type Parameters

T

Type of object to want to fill. It should have atleast one property that defined XPath.

Exceptions

ArgumentException

Why it's thrown.

ArgumentNullException

Why it's thrown.

MissingMethodException

MissingMethodException

MissingXPathException

MissingXPathException

System.Xml.XPath.XPathException

XPathExeption

NodeNotFoundException

NodeNotFoundException

NodeAttributeNotFoundException

NodeAttributeNotFoundException

FormatException

Why it's thrown.

Exception

Why it's thrown.

InvalidNodeReturnTypeException

InvalidNodeReturnTypeException

GetEncapsulatedData<T>(HtmlDocument)

Fill an object and go through it's properties and fill them too.

public T GetEncapsulatedData<T>(HtmlDocument htmlDocument)

Parameters

htmlDocument HtmlDocument

If htmlDocument includes data , leave this parameter null. Else pass your specific htmldocument.

Returns

T

Returns an object of type T including Encapsulated data.

Type Parameters

T

Type of object to want to fill. It should have atleast one property that defined XPath.

Exceptions

ArgumentException

Why it's thrown.

ArgumentNullException

Why it's thrown.

MissingMethodException

MissingMethodException

MissingXPathException

MissingXPathException

System.Xml.XPath.XPathException

XPathExeption

NodeNotFoundException

NodeNotFoundException

NodeAttributeNotFoundException

NodeAttributeNotFoundException

FormatException

Why it's thrown.

Exception

Why it's thrown.

InvalidNodeReturnTypeException

InvalidNodeReturnTypeException

HasClass(string)

Check if the node class has the parameter class.

public bool HasClass(string className)

Parameters

className string

Returns

bool

True if node class has the parameter class, false if not.

InsertAfter(HtmlNode, HtmlNode)

Inserts the specified node immediately after the specified reference node.

public HtmlNode InsertAfter(HtmlNode newChild, HtmlNode refChild)

Parameters

newChild HtmlNode

The node to insert. May not be null.

refChild HtmlNode

The node that is the reference node. The newNode is placed after the refNode.

Returns

HtmlNode

The node being inserted.

InsertBefore(HtmlNode, HtmlNode)

Inserts the specified node immediately before the specified reference node.

public HtmlNode InsertBefore(HtmlNode newChild, HtmlNode refChild)

Parameters

newChild HtmlNode

The node to insert. May not be null.

refChild HtmlNode

The node that is the reference node. The newChild is placed before this node.

Returns

HtmlNode

The node being inserted.

IsCDataElement(string)

Determines if an element node is a CDATA element node.

public static bool IsCDataElement(string name)

Parameters

name string

The name of the element node to check. May not be null.

Returns

bool

true if the name is the name of a CDATA element node, false otherwise.

IsClosedElement(string)

Determines if an element node is closed.

public static bool IsClosedElement(string name)

Parameters

name string

The name of the element node to check. May not be null.

Returns

bool

true if the name is the name of a closed element node, false otherwise.

IsEmptyElement(string)

Determines if an element node is defined as empty.

public static bool IsEmptyElement(string name)

Parameters

name string

The name of the element node to check. May not be null.

Returns

bool

true if the name is the name of an empty element node, false otherwise.

IsOverlappedClosingElement(string)

Determines if a text corresponds to the closing tag of an node that can be kept overlapped.

public static bool IsOverlappedClosingElement(string text)

Parameters

text string

The text to check. May not be null.

Returns

bool

true or false.

MoveChild(HtmlNode)

Move a node already associated and append it to this node instead.

public void MoveChild(HtmlNode child)

Parameters

child HtmlNode

The child node to move.

MoveChildren(HtmlNodeCollection)

Move a children collection already associated and append it to this node instead.

public void MoveChildren(HtmlNodeCollection children)

Parameters

children HtmlNodeCollection

The children collection already associated to move to another node.

PrependChild(HtmlNode)

Adds the specified node to the beginning of the list of children of this node.

public HtmlNode PrependChild(HtmlNode newChild)

Parameters

newChild HtmlNode

The node to add. May not be null.

Returns

HtmlNode

The node added.

PrependChildren(HtmlNodeCollection)

Adds the specified node list to the beginning of the list of children of this node.

public void PrependChildren(HtmlNodeCollection newChildren)

Parameters

newChildren HtmlNodeCollection

The node list to add. May not be null.

Remove()

Removes node from parent collection

public void Remove()

RemoveAll()

Removes all the children and/or attributes of the current node.

public void RemoveAll()

RemoveAllChildren()

Removes all the children of the current node.

public void RemoveAllChildren()

RemoveAllIDforNode(HtmlNode)

Removes all id for node described by node.

public void RemoveAllIDforNode(HtmlNode node)

Parameters

node HtmlNode

The node.

RemoveChild(HtmlNode)

Removes the specified child node.

public HtmlNode RemoveChild(HtmlNode oldChild)

Parameters

oldChild HtmlNode

The node being removed. May not be null.

Returns

HtmlNode

The node removed.

RemoveChild(HtmlNode, bool)

Removes the specified child node.

public HtmlNode RemoveChild(HtmlNode oldChild, bool keepGrandChildren)

Parameters

oldChild HtmlNode

The node being removed. May not be null.

keepGrandChildren bool

true to keep grand children of the node, false otherwise.

Returns

HtmlNode

The node removed.

RemoveChildren(HtmlNodeCollection)

Removes the children collection for this node.

public void RemoveChildren(HtmlNodeCollection oldChildren)

Parameters

oldChildren HtmlNodeCollection

The old children collection to remove.

RemoveClass()

Removes the class attribute from the node.

public void RemoveClass()

RemoveClass(bool)

Removes the class attribute from the node.

public void RemoveClass(bool throwError)

Parameters

throwError bool

true to throw Error if class name doesn't exist, false otherwise.

RemoveClass(string)

Removes the specified class from the node.

public void RemoveClass(string name)

Parameters

name string

The class being removed. May not be null.

RemoveClass(string, bool)

Removes the specified class from the node.

public void RemoveClass(string name, bool throwError)

Parameters

name string

The class being removed. May not be null.

throwError bool

true to throw Error if class name doesn't exist, false otherwise.

ReplaceChild(HtmlNode, HtmlNode)

Replaces the child node oldChild with newChild node.

public HtmlNode ReplaceChild(HtmlNode newChild, HtmlNode oldChild)

Parameters

newChild HtmlNode

The new node to put in the child list.

oldChild HtmlNode

The node being replaced in the list.

Returns

HtmlNode

The node replaced.

ReplaceClass(string, string)

Replaces the class name oldClass with newClass name.

public void ReplaceClass(string newClass, string oldClass)

Parameters

newClass string

The new class name.

oldClass string

The class being replaced.

ReplaceClass(string, string, bool)

Replaces the class name oldClass with newClass name.

public void ReplaceClass(string newClass, string oldClass, bool throwError)

Parameters

newClass string

The new class name.

oldClass string

The class being replaced.

throwError bool

true to throw Error if class name doesn't exist, false otherwise.

SelectNodes(string)

Selects a list of nodes matching the XPath expression.

public HtmlNodeCollection SelectNodes(string xpath)

Parameters

xpath string

The XPath expression.

Returns

HtmlNodeCollection

An HtmlNodeCollection containing a collection of nodes matching the XPath query, or null if no node matched the XPath expression.

SelectNodes(XPathExpression)

Selects a list of nodes matching the XPath expression.

public HtmlNodeCollection SelectNodes(XPathExpression xpath)

Parameters

xpath XPathExpression

The XPath expression.

Returns

HtmlNodeCollection

An HtmlNodeCollection containing a collection of nodes matching the XPath query, or null if no node matched the XPath expression.

SelectSingleNode(string)

Selects the first XmlNode that matches the XPath expression.

public HtmlNode SelectSingleNode(string xpath)

Parameters

xpath string

The XPath expression. May not be null.

Returns

HtmlNode

The first HtmlNode that matches the XPath query or a null reference if no matching node was found.

SelectSingleNode(XPathExpression)

Selects the first XmlNode that matches the XPath expression.

public HtmlNode SelectSingleNode(XPathExpression xpath)

Parameters

xpath XPathExpression

The XPath expression.

Returns

HtmlNode

An HtmlNodeCollection containing a collection of nodes matching the XPath query, or null if no node matched the XPath expression.

SetAttributeValue(string, string)

Helper method to set the value of an attribute of this node. If the attribute is not found, it will be created automatically.

public HtmlAttribute SetAttributeValue(string name, string value)

Parameters

name string

The name of the attribute to set. May not be null.

value string

The value for the attribute.

Returns

HtmlAttribute

The corresponding attribute instance.

SetChildNodesId(HtmlNode)

Sets child nodes identifier.

public void SetChildNodesId(HtmlNode chilNode)

Parameters

chilNode HtmlNode

The chil node.

SetParent(HtmlNode)

Sets the parent Html node and properly determines the current node's depth using the parent node's depth.

public void SetParent(HtmlNode parent)

Parameters

parent HtmlNode

WriteContentTo()

Saves all the children of the node to a string.

public string WriteContentTo()

Returns

string

The saved string.

WriteContentTo(TextWriter, int)

Saves all the children of the node to the specified TextWriter.

public void WriteContentTo(TextWriter outText, int level = 0)

Parameters

outText TextWriter

The TextWriter to which you want to save.

level int

Identifies the level we are in starting at root with 0

WriteTo()

Saves the current node to a string.

public string WriteTo()

Returns

string

The saved string.

WriteTo(TextWriter, int)

Saves the current node to the specified TextWriter.

public virtual void WriteTo(TextWriter outText, int level = 0)

Parameters

outText TextWriter

The TextWriter to which you want to save.

level int

identifies the level we are in starting at root with 0

WriteTo(XmlWriter)

Saves the current node to the specified XmlWriter.

public void WriteTo(XmlWriter writer)

Parameters

writer XmlWriter

The XmlWriter to which you want to save.