/usr/local/CPAN/Text-Restructured/Text/Restructured/DOM.pm


# $Id: DOM.pm 6235 2010-03-01 20:23:51Z mnodine $
# Copyright (C) 2002-2005 Freescale Semiconductor, Inc.
# Distributed under terms of the Perl license, which is the disjunction of
# the GNU General Public License (GPL) and the Artistic License.

package Text::Restructured::DOM;

($VERSION) = q$Revision: 6235 $ =~ /(\d+)/g;

# This package contains routines for Document Object Model (DOM) objects.
# A DOM object is the prest equivalent of a doctree object.

# Data structures:
#   _`Text::Restructured::DOM`: Recursive hash reference with following 
#     keys:
#       ``tag``:      The name of the tag of the DOM object
#       ``attr``:     Reference to hash of attribute/value pairs
#       ``content``:  Reference to array of DOM objects
#       ``text``:     Contains the literal text for #PCDATA
#       ``internal``: Reference to hash of internal attribute/value pairs
#       ``source``:   Optionally contains the source
#       ``lineno``:   Optionally contains the line number
#       ``lit``:      Optionally contains the literal text
#       ``val``:      The value returned by the DOM's handler (added 
#                     during traversal of the writer's handlers)

# Global variables:
#   ``%DOM::PARENT``: hash whose keys are DOM references and whose values are
#                     a reference to the DOM object of the parent.
#                     Should only be accessed indirectly through the 
#                     ``DOM::parent`` method.

use strict;
use vars qw(%PARENT);

# CLASS METHOD.
# Creates a new DOM object.
# Arguments: (optional) tag, (optional) list of attribute/value pairs
# Returns: DOM object
sub new {
    my ($class, $tag, %attr) = @_;

    my $dom = bless { }, $class;
    $dom->{tag} = $tag if defined $tag;
    $dom->{attr} = {%attr} if %attr;
    $dom->{content} = [];
    return $dom;
}

# CLASS METHOD.
# Creates a new DOM object that is a "#PCDATA" type.
# Arguments: text
# Returns: DOM object
sub newPCDATA {
    my ($class, $text) = @_;

    return bless {tag=>'#PCDATA', text=>$text, content=>[] };
}

# INSTANCE METHOD.
# Appends to the contents of a DOM object.
# Arguments: DOM objects to append
# Returns: The DOM object
sub append : method {
    my ($dom, @doms) = @_;

    @PARENT{@doms} = ($dom) x @doms;
    push @{$dom->{content}}, @doms;
    return $dom;
}

# INSTANCE METHOD.
# Returns the child with index n in the contents (0-based)
# Arguments: n
# Returns: child DOM object or undef
sub child : method {
    my ($dom, $n) = @_;
    return $dom->{content}[$n];
}

# INSTANCE METHOD.
# Returns the content objects the DOM object has
# Arguments: None
# Returns: Array of content DOM objects
sub contents : method {
    my ($dom) = @_;

    return @{$dom->{content}};
}

# INSTANCE METHOD.
# Returns the first DOM in the contents of a DOM.
# Arguments: None
# Returns: first DOM object (or undefined)
sub first : method {
    my ($dom) = @_;

    my $first;
    if (@{$dom->{content}}) {
	$first = $dom->{content}[0];
    }
    return $first;
}

# INSTANCE METHOD.
# Returns the index of a child in the contents (-1 if it does not occur).
# Arguments: child DOM object
# Returns: index number
sub index : method {
    my ($dom, $child) = @_;
    my $i;
    for ($i=0; $i<@{$dom->{content}}; $i++) {
	return $i if $dom->{content}[$i] == $child;
    }
    return -1;
}

# INSTANCE METHOD.
# Returns the last DOM in the contents of a DOM.
# Arguments: None
# Returns: last DOM object (or undefined)
sub last : method {
    my ($dom) = @_;

    my $last;
    if (@{$dom->{content}}) {
	$last = $dom->{content}[-1];
    }
    return $last;
}

# INSTANCE METHOD.
# Returns the next DOM in the logical structure of the tree.  If the
# given DOM is the last in a section or list, this routine may have to
# go up in the tree to find the next object.
# Arguments: optional regular expression for tags to ignore
# Returns: next DOM or undef
sub next : method {
    my ($dom, $ignore) = @_;

    my $parent = $dom->parent();
    my $indx = $parent->index($dom) + 1;
    my $cur_parent = $parent;
    while (defined $cur_parent) {
	while ($indx < $cur_parent->num_contents()) {
	    my $tag = $cur_parent->{content}[$indx]{tag};
	    if (defined $ignore && $tag =~ /^(?:$ignore)$/) {
		# It's a skippable tag
		$indx++;
		next;
	    }
	    return $cur_parent->{content}[$indx];
	}
	my $new_parent = $cur_parent->parent();
	return unless defined $new_parent;
	$indx = $new_parent->index($cur_parent) + 1;
	$cur_parent = $new_parent;
    }
}

# INSTANCE METHOD.
# Returns the number of content objects the DOM object has
# Arguments: None
# Returns: Number of elements
sub num_contents : method {
    my ($dom, @doms) = @_;

    return 0+@{$dom->{content}};
}

# INSTANCE METHOD.
# Returns the parent DOM of an instance.
# Arguments: None
# Returns: The DOM object's parent
sub parent : method {
    my ($dom) = @_;

    return $PARENT{$dom};
}

# INSTANCE METHOD.
# Puts the arguments at the beginning of the contents of a DOM object.
# Arguments: DOM objects to prepend
# Returns: The new number of objects
sub prepend : method {
    my ($dom, @doms) = @_;

    @PARENT{@doms} = ($dom) x @doms;
    unshift (@{$dom->{content}}, @doms);
}

# INSTANCE METHOD.
# Goes through a DOM object recursively calling a subroutine on every
# element.  It can do either preorder, postorder or bothorder traversal
# (defaults to postorder).  Unlike Reshape, it does not modify the
# children of the nodes it visits.
# Arguments: callback routine, optional 'pre'/'post'/'both',
#            optional additional arguments to be propagated
# Returns: Stop recursion flag
# Callback routine arguments: target DOM, 'pre'/'post',
#                             optional additional arguments
# Callback routine returns: non-zero in 'pre' mode to avoid further recursion.
sub Recurse : method {
    my($dom, $sub, $when, @args) = @_;

    $when = 'post' unless defined $when;
    my $stop;
    if ($when =~ /^(pre|both)$/) {
	$stop = eval { &{$sub}($dom, 'pre', @args) };
	die "Error: $sub: $@" if $@;
    }
    return if $stop;

    my @contents = @{$dom->{content}};
    my $i;
    for ($i=0; $i<@contents; $i++) {
	my $content = $contents[$i];
	$content->Recurse($sub, $when, @args);
    }

    if ($when ne 'pre') {
	eval { &{$sub}($dom, 'post', @args) };
	die "Error: $sub: $@" if $@;
    }
}

# INSTANCE METHOD.
# Replaces the contents of a DOM object with a new set of objects.
# Arguments: DOM objects to replace
# Returns: None
sub replace : method {
    my ($dom, @doms) = @_;

    @PARENT{@doms} = ($dom) x @doms;
    @{$dom->{content}} = @doms;
    return;
}

# INSTANCE METHOD.
# Goes through a DOM object recursively calling a subroutine on every
# element.  It can do either preorder, postorder or bothorder traversal
# (defaults to postorder).
# Arguments: callback routine, optional 'pre'/'post'/'both',
#            optional additional arguments to be propagated
# Returns: Reference to new set objects to replace the current object
# Callback routine arguments: target DOM, 'pre'/'post',
#                             optional additional arguments
# Callback routine returns: whatever list of DOM objects are to be 
#                           substituted for the current node (this
#                           list is returned on the 'post' call if
#                           'both' is selected).
sub Reshape : method {
    my($dom, $sub, $when, @args) = @_;

    $when = 'post' unless defined $when;
    my @newdom;
    if ($when =~ /^(pre|both)$/) {
	@newdom = eval { &{$sub}($dom, 'pre', @args) };
	die "Error: $sub: $@" if $@;
    }

    my @contents = @{$dom->{content}};
    my $i;
    my $replace = 0;
    for ($i=0; $i<@contents; $i++) {
	my $content = $contents[$i];
	my @new_contents = grep(defined $_,
				$content->Reshape($sub, $when, @args));
	$dom->splice($replace, 1, @new_contents);
	$replace += @new_contents;
    }

    if ($when ne 'pre') {
	@newdom = eval { &{$sub}($dom, 'post', @args) };
	die "Error: $sub: $@" if $@;
    }

    return @newdom;
}

# INSTANCE METHOD.
# Splices objects into the contents of a DOM object.
# Arguments: start index, number to replace, list of DOM objects to splice
# Returns: Array of removed objects
sub splice : method {
    my ($dom, $index, $n, @doms) = @_;

    @PARENT{@doms} = ($dom) x @doms;
    return splice(@{$dom->{content}}, $index, $n, @doms);
}

# INSTANCE METHOD.
# Substitutes a different set of DOM objects for a given DOM object in the
# contents of its parent.
# Arguments: list of DOM objects
# Returns: None
sub substitute : method {
    my($dom, @new_doms) = @_;

    my $parent = $dom->parent;
    return unless $parent;
    my $index = $parent->index($dom);
    return if $index < 0;
    splice @{$parent->{content}}, $index, 1, @new_doms;
    delete $PARENT{$dom};
    @PARENT{@new_doms} = ($parent) x @new_doms;
}

# INSTANCE METHOD.
# Returns the tag of a DOM object
# Arguments: Optional new tag value
# Returns: Tag
sub tag : method {
    my($dom, $new_tag) = @_;

    $dom->{tag} = $new_tag if defined $new_tag;
    return $dom->{tag};
}

# Parses text that is in DOM (pseudo-XML) format.
# Arguments: Text, reference to hash of command-line options
# Returns: DOM object
# Uses globals: None
sub Parse {
    my ($text, $opt) = @_;
    my $last_indent = -1;
    my @stack;
    my @indents;
    my $tos;	# top of stack
    my $main;
    my @text = split /\n/, $text;
    foreach (@text) {
	my ($spaces) = /^(\s*)/;
	my $indent = length($spaces);
	if (@stack > 0) {
	    my $i;
	    for ($i=0; $i < @indents; $i++) {
		last if $indent <= $indents[$i]+1;
	    }
	    splice(@stack, $i);
	    splice(@indents, $i);
	    $tos = $stack[-1];
	}

	if (/^(\s*)<(\w+)\s*([^>]*)>\s*$/) {
	    my ($spaces, $tag, $attrlist) = ($1, $2, $3);
	    my $dom = new Text::Restructured::DOM($tag);
	    while ($attrlist ne '') {
		if ($attrlist =~ s/^([\w:]+)=([\"\'])([^\"]*)\2\s*//) {
		    $dom->{attr}{$1} = $3;
		}
		elsif ($attrlist =~ s/^(\w+)\s*//) {
		    $dom->{attr}{$1} = undef;
		}
		else {
		    goto pcdata;
		}
	    }
	    $tos->append($dom) if $tos;
	    if (@stack > 0) {
		$tos = $dom;
	    }
	    else {
		$main = $dom;
	    }
	    push (@stack, $dom);
	    push (@indents, $indent);
	    $tos = $dom;
	}
	else {
	  pcdata:
	    substr($_,0,$indents[-1]+4) = "";
	    chomp;
	    my $text = $_;
	    my $ncontent = @{$tos->{content}};
	    if ($ncontent > 0 &&
		$tos->{content}[$ncontent-1]{tag} eq '#PCDATA') {
		$tos->{content}[$ncontent-1]{text} .= "$text\n";
	    }
	    else {
		my $dom = newPCDATA Text::Restructured::DOM("$text\n");
		$tos->append($dom);
	    }
	}
    };

    $main->{attr}{source} = $opt->{D}{source} || $ARGV;
    return $main;
}

# Methods relating to the DTD

BEGIN {
# These are computed from the docutils.dtd using XML::Smart::DTD
my @takes_body_elts =
    qw(admonition attention block_quote caution citation compound
       container danger definition description document error
       field_body footer footnote header hint important legend
       list_item note section sidebar system_message tip topic
       warning);
my @takes_inline_elts =
    qw(abbreviation acronym address attribution author caption
       classifier contact copyright date doctest_block emphasis
       field_name generated inline line literal_block organization
       paragraph problematic raw reference revision rubric status
       strong subscript substitution_definition substitution_reference
       subtitle superscript target term title title_reference
       version);
my @is_body_elt =
    qw(admonition attention block_quote bullet_list caution citation
       comment compound container danger definition_list doctest_block
       enumerated_list error field_list figure footnote hint image
       important line_block literal_block note option_list paragraph
       pending raw reference rubric substitution_definition
       system_message table target tip warning);
my @is_inline_elt =
    qw(emphasis strong literal reference footnote_reference
       citation_reference substitution_reference title_reference
       abbreviation acronym subscript superscript inline problematic
       generated target image raw);
my (%takes_body_elts, %takes_inline_elts, %is_body_elt, %is_inline_elt);
@takes_body_elts{@takes_body_elts}     = (1) x @takes_body_elts;
@takes_inline_elts{@takes_inline_elts} = (1) x @takes_inline_elts;
@is_body_elt{@is_body_elt}             = (1) x @is_body_elt;
@is_inline_elt{@is_inline_elt}         = (1) x @is_inline_elt;

# INSTANCE METHOD.
# Arguments: None
# Returns: True if the DOM object can take body elements in its contents
sub takes_body_elts : method {
    my ($dom) = @_;
    return $takes_body_elts{$dom->tag} || 0;
}

# INSTANCE METHOD.
# Arguments: None
# Returns: True if the DOM object can take inline elements in its contents
sub takes_inline_elts : method {
    my ($dom) = @_;
    return $takes_inline_elts{$dom->tag} || 0;
}

# INSTANCE METHOD.
# Arguments: None
# Returns: True if the DOM object is a body element
sub is_body_elt : method {
    my ($dom) = @_;
    return $is_body_elt{$dom->tag} || 0;
}

# INSTANCE METHOD.
# Arguments: None
# Returns: True if the DOM object is an inline element
sub is_inline_elt : method {
    my ($dom) = @_;
    return $is_inline_elt{$dom->tag} || 0;
}

}

1;