/usr/local/CPAN/perlSGML.1997Sep/SGML/SOCat.pm
##---------------------------------------------------------------------------##
## File:
## @(#) SOCat.pm 1.10 97/09/15 14:58:23 @(#)
## Author:
## Earl Hood ehood@medusa.acs.uci.edu
## Description:
## This file defines the SGML::SOCat class. POD documentation
## at the end of this file.
##---------------------------------------------------------------------------##
## Copyright (C) 1996,1997 Earl Hood, ehood@medusa.acs.uci.edu
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public
## License along with this program; if not, write to the Free
## Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
## MA 02111-1307, USA.
##---------------------------------------------------------------------------##
package SGML::SOCat;
use vars qw(@ISA @EXPORT $VERSION
$MaxErrs
$com $lit $lit_ $lita $lita_ $quotes
$_hcnt %_name_sysid_entries %_scalar_entries
%Entries);
use Exporter ();
@ISA = qw( Exporter );
@EXPORT = ();
$VERSION = "0.02";
$MaxErrs= 10; # Max number of errors before aborting
$com = q/--/; # Comment delimiter
$lit = q/"/; # Literal delimiter
$lit_ = q/"/;
$lita = q/'/; # Literal (alternate) delimiter
$lita_ = q/'/;
$quotes = q/"'/; # All literal delimiters
$_hcnt = 0; # Filehandle count
%_name_sysid_entries = (
'DELEGATE' => { argc => 2, argt => [1,0] },
'DOCTYPE' => { argc => 2, argt => [0,0] },
'DTDDECL' => { argc => 2, argt => [1,0] },
'ENTITY%' => { argc => 2, argt => [0,0] },
'ENTITY' => { argc => 2, argt => [0,0] },
'LINKTYPE' => { argc => 2, argt => [0,0] },
'NOTATION' => { argc => 2, argt => [0,0] },
'PUBLIC' => { argc => 2, argt => [1,0] },
'SYSTEM' => { argc => 2, argt => [0,0] },
);
%_scalar_entries = (
'DOCUMENT' => { argc => 1, argt => [0] },
'SGMLDECL' => { argc => 1, argt => [0] },
);
%Entries = ( # Legal catalog entries
%_name_sysid_entries,
%_scalar_entries,
'BASE' => { argc => 1, argt => [0] },
'CATALOG' => { argc => 1, argt => [0] },
'OVERRIDE' => { argc => 1, argt => [0] },
);
##**********************************************************************##
## PUBLIC METHODS
##**********************************************************************##
##----------------------------------------------------------------------
## new() constructor.
##
sub new {
my $this = { };
my $class = shift;
my $file = shift;
bless $this, $class;
$this->_reset();
my $stat = 1;
if (ref($file)) { # if reference, assume reference to filehandle
my $name = shift;
$stat = $this->read_handle($file, $name);
} elsif (defined($file)) {
$stat = $this->read_file($file);
}
$stat ? $this : undef;
}
##----------------------------------------------------------------------
## read_file() reads the catalog designated by the filename
## passed in. A 1 is returned on success, and a 0 on failure.
##
sub read_file {
my $this = shift;
my $fname = shift;
my $handle = "CAT" . $_hcnt++;
if (open($handle, $fname)) {
$this->read_handle(\*$handle, $fname);
} else {
$this->_errMsg(qq{Unable to open "$fname"});
}
}
##----------------------------------------------------------------------
## read_handle() reads the catalog designated by the filehandle
## passed in. A 1 is returned on success, and a 0 on failure.
## A reference to a filehandle should passed in to avoid problems
## with package scoping.
##
sub read_handle {
my $this = shift;
my ($fh, $fname) = @_;
## Push file data onto stack
push(@{$this->{_File}}, { _FH => $fh,
_filename => $fname,
_buf => undef,
_line_num => 0,
_override => 0,
_base => '',
_errcnt => 0,
_peek => [ ],
} );
## We use an eval block to capture die's
eval {
my($token, $islit, $i, $tmp, $override, $base);
my(@args);
my $fref = $this->{_File}[$#{$this->{_File}}];
ENTRY: while (1) {
## Get next entry token
($token, $islit) = $this->_get_next_token();
last ENTRY unless defined($token);
## Check if literal
if ($islit) {
$this->_errMsg("Line ", $fref->{_line_num},
": Spurious literal '$token'");
next ENTRY;
}
## Check if entry is recognized
$token =~ tr/a-z/A-Z/;
if (!$Entries{$token}) {
$this->_errMsg("Line ", $fref->{_line_num},
": Unrecognized entry '$token'");
## Skip passed any arguments to unrecognized entry
while (1) {
($token, $islit) = $this->_get_next_token(1);
last ENTRY unless defined($token);
last unless $islit;
$this->_get_next_token();
}
next ENTRY;
}
## Have known entry ##
## Get arguments for entry
@args = ();
for ($i = 0; $i < $Entries{$token}{argc}; $i++) {
if (!defined($tmp = ($this->_get_next_token())[0])) {
$this->_errMsg("Unexpected EOF");
last ENTRY;
}
## Compress whitespace if required
if ($Entries{$token}{argt}[$i]) {
$tmp =~ s/\s+/ /g;
}
push(@args, $tmp);
}
## Store entry information
$override = $fref->{_override};
$base = $fref->{_base};
SW: {
if (defined($_name_sysid_entries{$token})) {
$tmp = ($args[0] =~ '%') ? 'ENTITY%' : $token;
# Only store entry if not already defined
if (!defined($this->{$tmp}{$args[0]})) {
$this->{$tmp}{$args[0]} = {
sysid => $args[1],
override => $override,
base => $base,
};
# Check if DELEGATE and store size of pubid
# prefix
if ($tmp eq 'DELEGATE') {
push(@{$this->{_DelSizes}{length($args[0])}},
$args[0]);
}
}
last SW;
}
if (defined($_scalar_entries{$token})) {
$this->{$token} = {
sysid => $args[0],
base => $base,
} unless defined $this->{$token};
last SW;
}
if ($token eq 'BASE') {
$fref->{_base} = $args[0];
last SW;
}
if ($token eq 'OVERRIDE') {
$fref->{_override} = ($args[0] =~ /yes/i) ? 1 : 0;
last SW;
}
if ($token eq 'CATALOG') {
$this->read_file($args[0]);
last SW;
}
$this->_errMsg("Internal Error\n");
} # End SW
} # End ENTRY
}; # End eval
if ($@) { warn $@; }
## Pop file data off stack
pop(@{$this->{_File}});
## Return status
$@ ? 0 : 1;
}
##----------------------------------------------------------------------
## get_public() retrieves the sysid public identifier.
##
## Usage:
##
## Scalar context: Check if pubid has an entry
## if ($cat->get_public($pubid)) {
## ...
## }
## Array context: Retrieve sysid of pubid
## ($sysid, $base, $override) = $cat->get_public($pubid);
##
sub get_public {
my $this = shift;
my $pubid = shift;
$pubid =~ s/\s+/ /g;
wantarray
?
($this->{PUBLIC}{$pubid}{sysid},
$this->{PUBLIC}{$pubid}{base},
$this->{PUBLIC}{$pubid}{override})
:
defined($this->{PUBLIC}{$pubid});
}
##----------------------------------------------------------------------
## get_gen_ent() retrieves the sysid general entity name.
##
## Usage:
## Scalar context: Check if general entity name has an entry
## if ($cat->get_gen_ent($name)) {
## ...
## }
## Array context: Retrieve sysid of general entity
## ($sysid, $base, $override) = $cat->get_gen_ent($name);
##
sub get_gen_ent {
my $this = shift;
my $name = shift;
wantarray ?
($this->{ENTITY}{$name}{sysid},
$this->{ENTITY}{$name}{base},
$this->{ENTITY}{$name}{override})
:
defined($this->{ENTITY}{$name});
}
##----------------------------------------------------------------------
## get_parm_ent() retrieves the sysid for parameter entity name.
##
## Usage:
## Scalar context: Check if parameter entity name has an entry
## if ($cat->get_parm_ent($name)) {
## ...
## }
## Array context: Retrieve sysid of parameter entity
## ($sysid, $base, $override) = $cat->get_parm_ent($name);
##
sub get_parm_ent {
my $this = shift;
my $name = shift;
wantarray
?
($this->{'ENTITY%'}{$name}{sysid},
$this->{'ENTITY%'}{$name}{base},
$this->{'ENTITY%'}{$name}{override})
:
defined($this->{'ENTITY%'}{$name});
}
##----------------------------------------------------------------------
## get_doctype() retrieves the sysid for the entity denoted
## by document type name.
##
## Usage:
## Scalar context: Check if doctype has an entry
## if ($cat->get_doctype($name)) {
## ...
## }
## Array context: Retrieve sysid of doctype external subset
## ($sysid, $base, $override) = $cat->get_doctype($name);
##
sub get_doctype {
my $this = shift;
my $name = shift;
wantarray
?
($this->{DOCTYPE}{$name}{sysid},
$this->{DOCTYPE}{$name}{base},
$this->{DOCTYPE}{$name}{override})
:
defined($this->{DOCTYPE}{$name});
}
##----------------------------------------------------------------------
## get_linktype() retrieves the sysid for the entity denoted
## by link type name.
##
## Usage:
## Scalar context: Check if linktype name has an entry
## if ($cat->get_linktype($name)) {
## ...
## }
## Array context: Retrieve sysid of linktype nsmr
## ($sysid, $base, $override) = $cat->get_linktype($name);
##
sub get_linktype {
my $this = shift;
my $name = shift;
wantarray
?
($this->{LINKTYPE}{$name}{sysid},
$this->{LINKTYPE}{$name}{base},
$this->{LINKTYPE}{$name}{override})
:
defined($this->{LINKTYPE}{$name});
}
##----------------------------------------------------------------------
## get_notation() retrieves the sysid for the entity denoted
## by notation name.
##
## Usage:
## Scalar context: Check if notation has an entry
## if ($cat->get_notation($name)) {
## ...
## }
## Array context: Retrieve sysid of notation
## ($sysid, $base, $override) = $cat->get_notation($name);
##
sub get_notation {
my $this = shift;
my $name = shift;
wantarray ?
($this->{NOTATION}{$name}{sysid},
$this->{NOTATION}{$name}{base},
$this->{NOTATION}{$name}{override})
:
defined($this->{NOTATION}{$name});
}
##----------------------------------------------------------------------
## get_system() retrieves the sysid for the entity denoted
## by a system id.
##
## Usage:
## Scalar context: Check if sysid has an entry
## if ($cat->get_system($esysid)) {
## ...
## }
## Array context: Retrieve sysid of a sysid
## ($sysid, $base, $override) = $cat->get_system($esysid);
##
sub get_system {
my $this = shift;
my $sysid = shift;
wantarray
?
($this->{SYSTEM}{$sysid}{sysid},
$this->{SYSTEM}{$sysid}{base},
$this->{SYSTEM}{$sysid}{override})
:
defined($this->{SYSTEM}{$sysid});
}
##----------------------------------------------------------------------
## get_sgmldecl() retrieves the sysid for the SGML declaration.
##
## Usage:
## Scalar context: Check if sgmldecl entry defined
## if ($cat->get_sgmldecl()) {
## ...
## }
## Array context: Retrieve sysid of sgmldecl
## ($sysid, $base) = $cat->get_sgmldecl();
##
sub get_sgmldecl {
my $this = shift;
wantarray
?
($this->{SGMLDECL}{sysid},
$this->{SGMLDECL}{base})
:
defined($this->{SGMLDECL});
}
##----------------------------------------------------------------------
## get_dtddecl() retrieves the sysid for the SGML declaration
## associated with a doctype external subset pubid.
##
## Usage:
## Scalar context: Check if dtddecl defined
## if ($cat->get_dtddecl($pubid)) {
## ...
## }
## Array context: Retrieve sysid of dtddecl
## ($sysid, $base) = $cat->get_dtddecl($pubid);
##
sub get_dtddecl {
my $this = shift;
my $pubid = shift;
$pubid =~ s/\s+/ /g;
wantarray
?
($this->{DTDDECL}{$pubid}{sysid},
$this->{DTDDECL}{$pubid}{base})
:
defined($this->{DTDDECL}{$pubid});
}
##----------------------------------------------------------------------
## get_document() retrieves the sysid for the document entity.
##
## Usage:
## Scalar context: Check if document entity defined
## if ($cat->get_document()) {
## ...
## }
## Array context: Retrieve sysid of document entity
## ($sysid, $base) = $cat->get_document();
##
sub get_document {
my $this = shift;
wantarray
?
($this->{DOCUMENT}{sysid},
$this->{DOCUMENT}{base})
:
($this->{DOCUMENT});
}
##----------------------------------------------------------------------
## get_delegate() checks a pubid to see if a pubid-prefix has
## been defined that matches the pubid. If so, then a sysid of
## a catalog is returned. The catalog should be used to resolve
## pubids that match the prefix.
##
## Usage:
## Scalar context: Check if pubid has a prefix entry
## if ($cat->get_delegate($pubid)) {
## ...
## }
## Array context: Retrieve sysid of catalog for pubid
## ($sysid, $base) = $cat->get_delegate($pubid);
##
sub get_delegate {
my $this = shift;
my $in_pubid = shift;
my($len, @pubpres);
my $pubpre = '';
$in_pubid =~ s/\s+/ /g;
## Sort prefixes by size with largest first
@pubpres = sort { $b <=> $a } keys %{$this->{_DelSizes}};
## Check if there is a pubid prefix for in_pubid
OUTER: foreach $len (@pubpres) {
INNER: foreach (@{$this->{_DelSizes}{$len}}) {
if ($in_pubid =~ /^$_/) {
$pubpre = $_;
last OUTER;
}
}
}
wantarray
?
($pubpre ? ($this->{DELEGATE}{$pubpre}{sysid},
$this->{DELEGATE}{$pubpre}{base}) : ())
:
$pubpre ? 1 : 0;
}
##**********************************************************************##
## PRIVATE METHODS
##**********************************************************************##
##----------------------------------------------------------------------
## _reset() initializes the data structures for SOCat.
##
sub _reset {
my $this = shift;
$this->{_File} = [ ];
}
##----------------------------------------------------------------------
## _errMsg() prints a formatted error message. The passed
## in message is prefixed by the class name and the filename
## of the file at the top of the stack.
##
sub _errMsg {
my $this = shift;
my $fref = $this->{_File}[$#{$this->{_File}}];
my $prfx = join('', ref($this), ":", $fref->{_filename}, ":");
## Output message
warn $prfx, @_, "\n";
## Check if error count over maximum
my $errcnt = ++$fref->{_errcnt};
if ($errcnt >= $MaxErrs) {
die $prfx, "Parsing aborted; too many errors ($errcnt)\n";
}
}
##----------------------------------------------------------------------
## _get_next_token() grabs the next token from the file at
## top of the stack. If a non-zero argument is passed in,
## the function will return next token but keep it in the
## input. This allows one to peek at the next token.
##
sub _get_next_token {
my $this = shift;
my $peeking = shift;
my $token = undef;
my $islit = 0;
my $fref = $this->{_File}[$#{$this->{_File}}];
## Check if token cached from previous peek
if (@{$fref->{_peek}}) {
($token, $islit) = @{$fref->{_peek}};
$fref->{_peek} = [ ] unless $peeking;
}
return ($token, $islit) if defined($token);
## Do some aliasing to make things easier
local(*buf) = \$fref->{_buf};
local(*fh) = \$fref->{_FH};
local(*line_num) = \$fref->{_line_num};
## Get next token from filehandle
GETTOKEN: while (1) {
## Load buffer if empty
while (!$buf or $buf !~ /\S/) {
last GETTOKEN unless $buf = <$fh>;
$line_num = $.;
}
## Remove any leading spaces from buffer
$buf =~ s/^\s+//;
## Check for comment
if ($buf =~ s/^$com//o) {
while (1) {
if ($buf =~ /$com/o) {
$buf = $';
last;
}
if (not $buf = <$fh>) {
$this->_errMsg("Line $.: ",
"Unclosed comment at EOF ",
"(comment start: line $line_num)");
last GETTOKEN;
}
}
$line_num = $.;
next GETTOKEN;
}
## Literal Token
if ($buf =~ s/^([$quotes])//o) {
my $q = $1;
$islit = 1;
$token = '';
while (1) {
if (($q eq $lit_) ? ($buf =~ /$lit/o) :
($buf =~ /$lita/o)) {
$token .= $`;
$buf = $';
last;
}
$token .= $buf;
if (not $buf = <$fh>) {
$this->_errMsg("Line $.: ",
"Unclosed literal at EOF ",
"(literal start: line $line_num)");
$line_num = $.;
last GETTOKEN;
}
}
last GETTOKEN;
}
# Name token
$buf =~ s/(\S+)\s*//;
$token = $1;
last GETTOKEN;
}
## Save token if peeking
@{$fref->{_peek}} = ($token, $islit) if $peeking;
## Return token
($token, $islit);
}
##----------------------------------------------------------------------
1;