/usr/local/CPAN/Combine/Combine/CleanXML2CanDoc.pm


package Combine::CleanXML2CanDoc;

$VERSION = '0.1';

#############################################################################
#
#  A "clean XML" (in UTF8) -> canonicalDocument converter class
#
#      Options:
#  
#            'indentation'           The number of ' ' chars * L inserted
#                                    at the beginning of the line at 
#                                    <canonicalDocument> tree level L.
#
#     Kimmo Valtonen 
#
#############################################################################

use Alvis::Canonical;

#
# Return codes
#

($OK,
 $CAN_EXT_FAILED,
 $VALIDATION_FAILED)=(0,1,2);

#
# Default values for parameters
#
my $DEFAULT_INDENTATION=2;

sub new
{
    my $proto=shift;

    my $class=ref($proto)||$proto;
    my $parent=ref($proto)&&$proto;
    my $self={};
    bless($self,$class);

    $self->_init(@_);

    $self->{converter}=Alvis::Canonical->new(cleanChars=>1);
    if (!defined($self->{converter}))
    {
	warn Alvis::Canonical::errmsg();
	return undef;
    }

    return $self;
}

sub _init
{
    my $self=shift;
    
    $self->{indentation}=$DEFAULT_INDENTATION;

    if (defined(@_))
    {
        my %args=@_;
        @$self{ keys %args } = values( %args );
    }
}

sub convert
{
    my $self=shift;
    my $xml=shift;

    #
    # Extract a canonical version
    #
    my ($txt,$header)=$self->{converter}->HTML($xml);
    if (!defined($txt))
    {
	$err=Alvis::Canonical::errmsg();
	return ($CAN_EXT_FAILED,undef,$err);
    }

    my $ind=join(""," " x $self->{indentation});
    $txt=~s/^/$ind/mgo;

    $canonical="<canonicalDocument>${txt}</canonicalDocument>";

    return ($OK,$canonical,"");
}

1;