HTML::DOM::Node - A Perl class for representing the nodes of an HTML DOM tree


HTML-DOM documentation Contained in the HTML-DOM distribution.

Index


Code Index:

NAME

Top

HTML::DOM::Node - A Perl class for representing the nodes of an HTML DOM tree

SYNOPSIS

Top

  use HTML::DOM::Node ':all'; # constants
  use HTML::DOM;
  $doc = HTML::DOM->new;
  $doc->isa('HTML::DOM::Node'); # true
  $doc->nodeType == DOCUMENT_NODE; # true

  $doc->firstChild;
  $doc->childNodes;
  # etc

DESCRIPTION

Top

This is the base class for all nodes in an HTML::DOM tree. (See CLASSES AND DOM INTERFACES in HTML::DOM.) It implements the Node interface, and, indirectly, the EventTarget interface (see HTML::DOM::EventTarget.

METHODS

Top

Attributes

The following DOM attributes are supported:

nodeName
nodeType

These two are implemented not by HTML::DOM::Node itself, but by its subclasses.

nodeValue
parentNode
childNodes
firstChild
lastChild
previousSibling
nextSibling
attributes
ownerDocument
namespaceURI
prefix
localName

Those last three always return nothing.

There is also a _set_ownerDocument method, which you probably do not need to know about.

Other Methods

See the DOM spec. for descriptions of most of these. The first four automatically trigger mutation events. (See HTML::DOM::Event::Mutation.)

insertBefore
replaceChild
removeChild
appendChild
hasChildNodes
cloneNode
normalize
hasAttributes
isSupported
trigger_event

This overrides HTML::DOM::EventTarget's (non-DOM) method of the same name, so that the document's default event handler is called.

as_text
as_HTML

These two (non-DOM) methods of HTML::Element are overridden, so that they work correctly with comment and text nodes.

EXPORTS

Top

The following node type constants are exportable:

ELEMENT_NODE (1)
ATTRIBUTE_NODE (2)
TEXT_NODE (3)
CDATA_SECTION_NODE (4)
ENTITY_REFERENCE_NODE (5)
ENTITY_NODE (6)
PROCESSING_INSTRUCTION_NODE (7)
COMMENT_NODE (8)
DOCUMENT_NODE (9)
DOCUMENT_TYPE_NODE (10)
DOCUMENT_FRAGMENT_NODE (11)
NOTATION_NODE (12)

SEE ALSO

Top

HTML::DOM

HTML::DOM::EventTarget


HTML-DOM documentation Contained in the HTML-DOM distribution.
package HTML::DOM::Node;

our $VERSION = '0.048';


use strict;
use warnings;

use constant {
	ELEMENT_NODE                => 1,
	ATTRIBUTE_NODE              => 2,
	TEXT_NODE                   => 3,
	CDATA_SECTION_NODE          => 4,
	ENTITY_REFERENCE_NODE       => 5,
	ENTITY_NODE                 => 6,
	PROCESSING_INSTRUCTION_NODE => 7,
	COMMENT_NODE                => 8,
	DOCUMENT_NODE               => 9,
	DOCUMENT_TYPE_NODE          => 10,
	DOCUMENT_FRAGMENT_NODE      => 11,
	NOTATION_NODE               => 12,
};

use Exporter 5.57 'import';
use HTML::DOM::Event;
use HTML::DOM::Exception qw'NO_MODIFICATION_ALLOWED_ERR NOT_FOUND_ERR
                               HIERARCHY_REQUEST_ERR 
                                 UNSPECIFIED_EVENT_TYPE_ERR';
use Scalar::Util qw'refaddr weaken blessed';

require HTML::DOM::EventTarget;
require HTML::DOM::Implementation;
require HTML::DOM::NodeList;
require HTML::Element;

our @ISA =('HTML::Element', # No,  a node  isn't  an  HTML  element,  but
 'HTML::DOM::EventTarget'); # HTML::Element has some nice tree-handling
                            # methods (and, after all, TreeBuilder's
                            # pseudo-elements aren't elements either).

our @EXPORT_OK = qw'
	ELEMENT_NODE               
	ATTRIBUTE_NODE             
	TEXT_NODE                  
	CDATA_SECTION_NODE         
	ENTITY_REFERENCE_NODE      
	ENTITY_NODE                
	PROCESSING_INSTRUCTION_NODE
	COMMENT_NODE               
	DOCUMENT_NODE              
	DOCUMENT_TYPE_NODE         
	DOCUMENT_FRAGMENT_NODE     
	NOTATION_NODE              
';
our %EXPORT_TAGS = (all => \@EXPORT_OK);



# ----------- ATTRIBUTE METHODS ------------- #

# sub nodeName {} # every subclass overrides this
# sub nodeType {} # likewise

sub nodeValue {
	if(@_ > 1) {
		die new HTML::DOM::Exception
			NO_MODIFICATION_ALLOWED_ERR,
			'Read-only node';# ~~~ only when the node is
		                                 #     readonly
	}
	return; # empty list
}

sub parentNode {
	my $p = $_[0]->parent;
	defined $p ? $p :()
}

sub childNodes {
	wantarray ? $_[0]->content_list :
		new HTML::DOM::NodeList $_[0]->content_array_ref;
}

sub firstChild {
	($_[0]->content_list)[0];
}

sub lastChild {
	($_[0]->content_list)[-1];
}

sub previousSibling {
	my $sib = scalar $_[0]->left;
	defined $sib ? $sib : ();
}

sub nextSibling {
	my $sib = scalar $_[0]->right;
	defined $sib ? $sib : ();
}

sub attributes {} # null for most nodes; overridden by Element

sub ownerDocument {
	my $self = shift;
	$$self{_HTML_DOM_Node_owner} || do {
		my $root = $self->root;
		# ~~~ I’m not sure this logic is right. I need to revisit
		#     this. Do we ever have a case in which ->root returns
		#     the wrong value? If so, can we guarantee that the
		#     ‘root’ has its _HTML_DOM_Node_owner attribute set?
		$$self{_HTML_DOM_Node_owner} = 
			$$root{_HTML_DOM_Node_owner} || $root;
		weaken $$self{_HTML_DOM_Node_owner};
		$$self{_HTML_DOM_Node_owner}
	};
}

sub _set_ownerDocument {
	$_[0]{_HTML_DOM_Node_owner} = $_[1];
	weaken $_[0]{_HTML_DOM_Node_owner};
}

*prefix = *localName = *namespaceURI = *attributes;


# ----------- METHOD METHODS ------------- #

sub insertBefore {
	# ~~~ NO_MODIFICATION_ALLOWED_ERR is meant to be raised if the
	#     node is read-only.
	# ~~~ HIERARCHY_REQUEST_ERR is also supposed to be raised if the
	#     node type does not allow children of $new_node's type.

	my($self,$new_node,$before) = @_;

	$self->is_inside($new_node) and
		die new HTML::DOM::Exception HIERARCHY_REQUEST_ERR,
		'A node cannot be inserted into one of its descendants';

	my $doc = $self->ownerDocument || $self;

	my $index;
	my @kids = $self->content_list;
	if($before) { FIND_INDEX: {
		for (0..$#kids) {
			$kids[$_] == $before 
				and $index = $_, last FIND_INDEX;
		}
		die new HTML::DOM::Exception NOT_FOUND_ERR,
		'insertBefore\'s 2nd argument is not a child of this node';
	}}
	else {
		$index = @kids;
	}

#$new_node->can('parent') or warn JE::Code::add_line_number("cant parent");
	my $old_parent = $new_node->parent;
	$old_parent and $new_node->trigger_event('DOMNodeRemoved',
		rel_node => $old_parent);
	my $was_inside_doc = $new_node->is_inside($doc);
	if($was_inside_doc) {
		$_->trigger_event('DOMNodeRemovedFromDocument')
		  for $new_node, $new_node->descendants;
	}

	$self->splice_content($index, 0, my @nodes =
		$new_node->isa('HTML::DOM::DocumentFragment')
		? $new_node->childNodes
		: $new_node
	);
	$_->_set_ownerDocument($doc) for @nodes;

	$new_node->trigger_event('DOMNodeInserted', rel_node => $self);
	if($self->is_inside($doc)) {
		for($new_node, $new_node->descendants) {
			if(
			 !$was_inside_doc
			 and my $sub = $doc->elem_handler(lc $_->tag)
			) {
				&$sub($doc,$_)
			}
			$_->trigger_event('DOMNodeInsertedIntoDocument')
		}
	}
	$_->trigger_event('DOMSubtreeModified')
	  for _nearest_common_parent($old_parent, $self);

	$doc->_modified;

	$new_node;
}

sub replaceChild {
	# ~~~ NO_MODIFICATION_ALLOWED_ERR is meant to be raised if the
	#     node is read-only.
	# ~~~ HIERARCHY_REQUEST_ERR is also supposed to be raised if the
	#     node type does not allow children of $new_node's type.

	my($self,$new_node,$old_node) = @_;

	$self->is_inside($new_node) and
		die new HTML::DOM::Exception HIERARCHY_REQUEST_ERR,
		'A node cannot be inserted into one of its descendants';

	my $doc = $self->ownerDocument || $self;

	no warnings 'uninitialized';
	$self == $old_node->parent or
		die new HTML::DOM::Exception NOT_FOUND_ERR,
		'replaceChild\'s 2nd argument is not a child of this node';

	$old_node->trigger_event('DOMNodeRemoved',
		rel_node => $self);
	my $in_doc = $self->is_inside($doc);
	if($in_doc) {
		$_->trigger_event('DOMNodeRemovedFromDocument')
		  for $old_node, $old_node->descendants;
	}
	my $old_parent = $new_node->parent;
	$old_parent and $new_node->trigger_event('DOMNodeRemoved',
		rel_node => $old_parent);
	if($new_node->is_inside($doc) && !$new_node->is_inside($old_node)){
		$_->trigger_event('DOMNodeRemovedFromDocument')
		  for $new_node, $new_node->descendants;
	}

	# If the owner is not set explicitly inside the node, it will lose
	# its owner.  The ownerDocument method  sets  it  if  it  is  not
	# already set.
	$old_node->ownerDocument;

	my $ret = $old_node->replace_with(
		my @nodes
		 = $new_node->isa('HTML::DOM::DocumentFragment')
		 ? $new_node->childNodes
		 : $new_node
	);
	$_->_set_ownerDocument($doc) for @nodes;

	$new_node->trigger_event('DOMNodeInserted', rel_node => $self);
	if($in_doc) {
		for($new_node, $new_node->descendants) {
			if(my $sub = $doc->elem_handler(lc $_->tag)) {
				&$sub($doc,$_)
			}
			$_->trigger_event('DOMNodeInsertedIntoDocument')
		}
	}
	$_->trigger_event('DOMSubtreeModified')
	  for _nearest_common_parent($old_parent, $self);

	$doc->_modified;

	$ret;
}

sub removeChild {
	# ~~~ NO_MODIFICATION_ALLOWED_ERR is meant to be raised if the
	#     node is read-only.

	my($self,$child) = @_;

	no warnings 'uninitialized';
	$self == $child->parent or
		die new HTML::DOM::Exception NOT_FOUND_ERR,
		'removeChild\'s argument is not a child of this node';

	# If the owner is not set explicitly inside the node, it will lose
	# its owner.  The ownerDocument method  sets  it  if  it  is  not
	# already set.
	my $doc = $child->ownerDocument;

	$child->trigger_event('DOMNodeRemoved',
		rel_node => $self);
	if($child->is_inside($doc)) {
		$_->trigger_event('DOMNodeRemovedFromDocument')
		  for $child, $child->descendants;
	}

	$child->detach;

	$self->trigger_event('DOMSubtreeModified');

	{($self->ownerDocument||next)->_modified;}

	$child;
}

sub appendChild {
	# ~~~ NO_MODIFICATION_ALLOWED_ERR is meant to be raised if the
	#     node is read-only.
	# ~~~ HIERARCHY_REQUEST_ERR is also supposed to be raised if the
	#     node type does not allow children of $new_node's type.

	my($self,$new_node) = @_;

	$self->is_inside($new_node) and
		die new HTML::DOM::Exception HIERARCHY_REQUEST_ERR,
		'A node cannot be inserted into one of its descendants';

	my $doc = $self->ownerDocument || $self;

	my $old_parent = $new_node->parent;
	$old_parent and $new_node->trigger_event('DOMNodeRemoved',
		rel_node => $old_parent);
	my $was_inside_doc = $new_node->is_inside($doc);
	if($was_inside_doc) {
		$_->trigger_event('DOMNodeRemovedFromDocument')
		  for $new_node, $new_node->descendants;
	}

	$self->push_content(
	 my @nodes = $new_node->isa('HTML::DOM::DocumentFragment')
	             ? $new_node->childNodes
	             : $new_node
	);
	$_->_set_ownerDocument($doc) for @nodes;

	$new_node->trigger_event('DOMNodeInserted', rel_node => $self);
	if($self->is_inside($doc)) {
		for($new_node, $new_node->descendants) {
			if(
			 !$was_inside_doc
			 and my $sub = $doc->elem_handler(lc $_->tag)
			) {
				&$sub($doc,$_)
			}
			$_->trigger_event('DOMNodeInsertedIntoDocument')
		}
	}
	$_->trigger_event('DOMSubtreeModified')
	  for _nearest_common_parent($old_parent, $self);

	$doc->_modified;

	$new_node;
}

# This is used to determine who gets a DOMSubtreeModified event. Despite
# its name, it may choose one of the two nodes passed to it if one is the
# parent of the other. If neither of the nodes is in the same tree, they
# are both returned. The first arg may be undef, in which case the 2nd
# is returned.
sub _nearest_common_parent {
	my ($node1,$node2)=@_;
	!defined $node1 and return $node2;
	$node1->root != $node2->root and return $node1, $node2;
	my $addr1 = $node1->address;
	my $addr2 = $node2->address;
	while(substr $addr1, 0, length $addr2, ne $addr2 and
	      substr $addr2, 0, length $addr1, ne $addr1) {
		s/\.[^.]*\z// for $addr1, $addr2;
	}
	$node2->address(
	  length $addr1 < length $addr2 ? $addr1 : $addr2
	)
}

sub hasChildNodes {
	!!$_[0]->content_list
}

sub cloneNode {
	my($self,$deep) = @_;
	if($deep) {
		(my $clown = $self->clone)
		  ->_set_ownerDocument($self->ownerDocument);
		$clown;
	}
	else {
		# ~~~ Do I need to reweaken any attributes?
		bless +(my $clone = { %$self }), ref $self;
		$clone->_set_ownerDocument($self->ownerDocument);
		delete $clone->{$_} for qw/ _parent _content /;
		$clone
	}
}

sub normalize {
	my @pile = my $self = shift;
	while(@pile) {
		if($pile[0]{_tag} eq '~text') {
			if($pile[0]{text} eq '') {
				shift(@pile)->detach, next
			}
			_:{while((my $next = $pile[0]->nextSibling||next _)
			       ->{_tag} eq '~text') {
				$pile[0]{text}.=$next->{text};
				$next->detach;
			}}
			shift @pile;
		}
		else {
			unshift @pile, @{(shift@pile)->{'_content'}||[]};
		}
	}
	return
}

sub hasAttributes {
	(shift->attributes||return 0)->length
}

sub isSupported {
	my $self = shift;
	$HTML::DOM::Implementation::it->hasFeature(@_)
}

# ----------- EVENT STUFF ------------- #

sub trigger_event { # non-DOM method
	my ($n,$evnt) = (shift,shift);
	my $doc = $n->ownerDocument||$n;
	$n->SUPER::trigger_event(
		$evnt,
		default => $doc->default_event_handler,
		view => scalar $doc->defaultView,
		@_,
	);
}

sub as_text{
	(my $clone = shift->clone)->deobjectify_text;
	$clone->SUPER::as_text(@_);
}

sub as_HTML{
	(my $clone = shift->clone)->deobjectify_text;
	$clone->SUPER::as_HTML(@_);
}

sub push_content {
	my $self  = shift; 
	@_ or return $self;
	my $count = ()=$self->content_list;
	$self->SUPER::push_content(@_);
	my $ary = $self->{_content};
	ref and weaken $_->{_parent} for @$ary[$count-@$ary..-1];
	$self
}

sub unshift_content {
	my $self  = shift; 
	my $count = ()=$self->content_list;
	$self->SUPER::unshift_content(@_);
	my $ary = $self->{_content};
	ref and weaken $_->{_parent} for @$ary[0..$#$ary-$count];
	$self
}

sub splice_content {
	my($self,$start,$deleted) = (shift,@_); 
	my $orig_count = ()=$self->content_list;
	$self->SUPER::splice_content(@_);
	my $ary = $self->{_content};

	# orig_length - deleted_items + x = final_length,
	# where x is the number of items added (to be weakened), so
	# x = final_length - orig_length + deleted_items.
	# x needs to be adjusted so it is an ending offset, so we use
	# $#$ary instead of the final length (@$ary) and add $start
	ref and weaken $_->{_parent}
		for @$ary[$start..$#$ary-$orig_count+$deleted+$start];

	$self
}

sub clone {
	my $self = shift;
	my $clone = $self->SUPER::clone;
	for ($clone->content_list) {
		ref or next;
		weaken $_->{_parent};
	}
	$clone;
}

sub replace_with {
	my $self = shift;
	$self->SUPER::replace_with(@_);
	for(@_) {
		no warnings;
		ref and weaken $_->{_parent};
	}
	$self;
}


1;
__END__