/usr/local/CPAN/XML-Smart/XML/Smart/Parser.pm


#############################################################################
## Name:        Parser.pm
## Purpose:     XML::Smart::Parser
## Author:      Paul Kulchenko (paulclinger@yahoo.com)
## Modified by: Graciliano M. P.
## Created:     10/05/2003
## RCS-ID:      
## Copyright:   2000-2001 Paul Kulchenko
## Licence:     This program is free software; you can redistribute it and/or
##              modify it under the same terms as Perl itself
##
## This module is actualy XML::Parser::Lite (with some updates). It's here
## just for convenience.
##
## See original code at CPAN for full source and POD.
##
## This module will be used when XML::Parser is not installed.
#############################################################################

# ======================================================================
#
# Copyright (C) 2000-2001 Paul Kulchenko (paulclinger@yahoo.com)
# SOAP::Lite is free software; you can redistribute it
# and/or modify it under the same terms as Perl itself.
#
# $Id: Lite.pm,v 1.4 2001/10/15 21:25:05 paulk Exp $
#
# Changes: Graciliano M. P. <gm@virtuasites.com.br>
#
# ======================================================================

package XML::Smart::Parser ;

no warnings ;
use strict;
use vars qw($VERSION);
$VERSION = 1.2 ;

my(@parsed , @stack, $level) ;

 &compile();

sub new { 
  my $class = ($_[0] =~ /^[\w:]+$/) ? shift(@_) : __PACKAGE__ ;
  my $this = bless {} , $class ;

  my %args = @_ ;
  $this->setHandlers(%args) ;
  
  $this->{NOENTITY} = 1 ;

  return $this ;
}

sub setHandlers {
  my $this = shift ;
  my %args = @_;
    
  $this->{Init}  = $args{Init} || sub{} ;
  $this->{Start} = $args{Start} || sub{} ;
  $this->{Char}  = $args{Char} || sub{} ;
  $this->{End}   = $args{End} || sub{} ;
  $this->{Final} = $args{Final} || sub{} ;
  
  return 1 ;
}

sub regexp {
  my $patch = shift || '' ;
  my $package = __PACKAGE__ ;

  my $TextSE = "[^<]+";
  my $UntilHyphen = "[^-]*-";
  my $Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-";
  my $CommentCE = "$Until2Hyphens>?";
  my $UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+";
  my $CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>";
  my $S = "[ \\n\\t\\r]+";
  my $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]";
  my $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]";
  my $Name = "(?:$NameStrt)(?:$NameChar)*";
  my $QuoteSE = "\"[^\"]*\"|'[^']*'";
  my $DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*";
  my $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>";
  my $S1 = "[\\n\\r\\t ]";
  my $UntilQMs = "[^?]*\\?+";
  my $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>";
  my $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S";
  my $DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?";
  my $DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:($CDATA_CE)(?{${package}::char_CDATA(\$2)}))?|DOCTYPE(?:$DocTypeCE)?";
  my $PI_CE = "$Name(?:$PI_Tail)?";

  my $EndTagCE = "($Name)(?{${package}::end(\$3)})(?:$S)?>";
  my $AttValSE = "\"([^<\"]*)\"|'([^<']*)'";
  my $ElemTagCE = "($Name)(?:$S($Name)(?:$S)?=(?:$S)?(?:$AttValSE)(?{[\@{\$^R||[]},\$5=>defined\$6?\$6:\$7]}))*(?:$S)?(/)?>(?{${package}::start(\$4,\@{\$^R||[]})})(?{\${8} and ${package}::end(\$4)})";
  my $MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)";

  "(?:($TextSE)(?{${package}::char(\$1)}))$patch|$MarkupSPE";
}

sub compile {
  local $^W; 
  
  foreach (regexp(), regexp('??')) {
    eval qq{sub parse_re { use re "eval"; 1 while \$_[0] =~ m{$_}go }; 1} or die;
    last if eval { parse_re('<foo>bar</foo>'); 1 }
  };

  *compile = sub {};
}

sub parse {
  my $this = shift ;
  
  @parsed = () ;
  
  init();
  parse_re($_[0]);
  final();

  no strict qw(refs);
  
  my $final = pop(@parsed) ; pop(@parsed) ;

  for (my $i = 0 ; $i <= $#parsed ; $i+=2) {
    my $args = $parsed[$i+1] ;
    &{$this->{$parsed[$i]}}($this , (ref($args) ? @{$args} : $args) ) ;
  }

  @parsed = () ;

  return &{$this->{Final}}($this, @{$final}) ;
}

sub init {
  @stack = (); $level = 0;
  push(@parsed , 'Init' , [@_]) ;
  return ;
}

sub final {
  die "not properly closed tag '$stack[-1]'\n" if @stack;
  die "no element found\n" unless $level;
  push(@parsed , 'Final' , [@_]) ;
  return ;
} 

sub start {
  die "multiple roots, wrong element '$_[0]'\n" if $level++ && !@stack;
  push(@stack, $_[0]);
  push(@parsed , 'Start' , [@_]) ;
  return ;
}

sub char {
  push(@parsed , 'Char' , [@_]) , return if @stack;

  for (my $i=0; $i < length $_[0]; $i++) {
    die "junk '$_[0]' @{[$level ? 'after' : 'before']} XML element\n"
      if index("\n\r\t ", substr($_[0],$i,1)) < 0; # or should '< $[' be there
  }
  return ;
}

sub char_CDATA {
  &char( substr($_[0] , 0 , -3) ) ;
}

sub end {
  pop(@stack) eq $_[0] or die "mismatched tag '$_[0]'\n";
  push(@parsed , 'End' , [@_]) ;
  return ;
}

# ======================================================================

1;