/usr/local/CPAN/SGML-DTDParse/SGML/DTDParse/Catalog.pm
# -*- Perl -*-
package SGML::DTDParse::Catalog;
use strict;
use vars qw($VERSION $CVS);
$VERSION = do { my @r=(q$Revision: 2.1 $=~/\d+/g); sprintf "%d."."%03d"x$#r,@r };
$CVS = '$Id: Catalog.pm,v 2.1 2005/07/02 23:51:18 ehood Exp $ ';
sub new {
my $type = shift;
my %param = @_;
my $class = ref($type) || $type;
my $self = bless {}, $class;
$self->{'DIRECTIVE'} = [];
$self->{'FILES'} = {};
$self->{'VERBOSE'} = $param{'Verbose'} || $param{'Debug'};
$self->{'DEBUG'} = $param{'Debug'};
my $catfiles = $main::ENV{'SGML_CATALOG_FILES'};
my @files = ();
if ($catfiles =~ /;/) {
@files = split(/;/, $catfiles);
} else {
@files = split(/:/, $catfiles);
}
foreach my $file (@files) {
$self->parse($file);
}
return $self;
}
sub verbose {
my $self = shift;
my $val = shift;
my $verb = $self->{'VERBOSE'};
$self->{'VERBOSE'} = $val if defined($val);
return $verb;
}
sub debug {
my $self = shift;
my $val = shift;
my $dbg = $self->{'DEBUG'};
$self->{'DEBUG'} = $val if defined($val);
return $dbg;
}
sub parse {
my $self = shift;
my $file = shift;
return 2 if $self->{'FILES'}->{$file};
$self->{'FILES'}->{$file} = 1;
return $self->load_catalog($file);
}
sub _find {
my $self = shift;
my $type = shift;
my $key = shift;
foreach my $dir (@{$self->{'DIRECTIVE'}}) {
my %hash = %{$dir};
return $hash{'FILE'} if $hash{'TYPE'} = $type && $hash{$type} eq $key;
}
return undef;
}
sub system_map {
my($self, $sysid) = @_;
return $self->_find('SYSID', $sysid) || $sysid;
}
sub public_map {
my($self, $pubid) = @_;
$pubid =~ s/\s+/ /g;
return $self->_find('PUBID', $pubid);
}
sub reverse_public_map {
my($self, $filename) = @_;
$filename =~ s/\\/\//g; # canonical path separator
foreach my $dir (@{$self->{'DIRECTIVE'}}) {
my %hash = %{$dir};
my $key = $hash{'TYPE'};
next if $key ne 'PUBID';
# print "$key\n";
# print $hash{$key}, "\n";
# print $hash{'FILE'}, "\n";
# print "\t$filename\n\n";
return $hash{$key} if $hash{'FILE'} eq $filename;
}
return undef;
}
sub declaration {
my($self, $pubid) = @_;
$pubid =~ s/\s+/ /g;
foreach my $dir (@{$self->{'DIRECTIVE'}}) {
my %hash = %{$dir};
return $hash{'FILE'}
if $hash{'TYPE'} eq 'DTDDECL' && $hash{'DTDDECL'} eq $pubid;
return $hash{'FILE'}
if $hash{'TYPE'} eq 'SGMLDECL';
}
return undef;
}
sub load_catalog {
my $self = shift;
my $catalog = shift;
my $drive = "";
my $dir = "";
my @directives = ();
my $count = 0;
local (*F, $_);
print "Reading $catalog...\n" if $self->verbose();
$catalog =~ s/\\/\//g; # canonical path separators
$dir = $1 if $catalog =~ /^(.*)\/[^\/]+$/;
$drive = substr($dir, 0, 2) if substr($dir, 1, 1) eq ':';
if (!open(F, $catalog)) {
print "Failed to open $catalog...\n" if $self->verbose();
return;
}
read (F, $_, -s $catalog);
close (F);
while (/^\s*(\S+)/s) {
my $keyword = uc($1);
$_ = $';
if ($keyword eq 'OVERRIDE') {
$_ =~ /^\s*\S+/s;
$_ = $';
next;
}
if ($keyword eq 'PUBLIC') {
my($pubid, $filename);
if (/^\s*[\"\']/s) {
($pubid, $_) = &parse_quoted_string("CATALOG", $_);
} else {
/^\s*(\S+)/s;
$pubid = $1;
$_ = $';
}
if (/^\s*[\"\']/s) {
($filename, $_) = &parse_quoted_string("CATALOG", $_);
} else {
/^\s*(\S+)/s;
$filename = $1;
$_ = $';
}
if ($filename =~ /^[a-z]:/s) {
# nop
} elsif ($filename =~ /^[\\\/]/) {
$filename = $drive . $filename;
} else {
$filename = $dir . "/" . $filename if $dir ne "";
}
$directives[$count] = {};
$directives[$count]->{'TYPE'} = 'PUBID';
$directives[$count]->{'PUBID'} = $pubid;
$directives[$count]->{'FILE'} = $filename;
$count++;
# print "\"$pubid\" = \"$filename\"\n";
next;
}
if ($keyword eq 'SYSTEM') {
my($sysid, $filename);
if (/^\s*[\"\']/s) {
($sysid, $_) = &parse_quoted_string("CATALOG", $_);
} else {
/^\s*(\S+)/s;
$sysid = $1;
$_ = $';
}
if (/^\s*[\"\']/s) {
($filename, $_) = &parse_quoted_string("CATALOG", $_);
} else {
/^\s*(\S+)/s;
$filename = $1;
$_ = $';
}
if ($filename =~ /^[a-z]:/s) {
# nop
} elsif ($filename =~ /^[\\\/]/) {
$filename = $drive . $filename;
} else {
$filename = $dir . "/" . $filename if $dir ne "";
}
$directives[$count] = {};
$directives[$count]->{'TYPE'} = 'SYSID';
$directives[$count]->{'SYSID'} = $sysid;
$directives[$count]->{'FILE'} = $filename;
$count++;
next;
}
if ($keyword eq 'DTDDECL') {
my($pubid, $filename);
if (/^\s*[\"\']/s) {
($pubid, $_) = &parse_quoted_string("CATALOG", $_);
} else {
/^\s*(\S+)/s;
$pubid = $1;
$_ = $';
}
if (/^\s*[\"\']/s) {
($filename, $_) = &parse_quoted_string("CATALOG", $_);
} else {
/^\s*(\S+)/s;
$filename = $1;
$_ = $';
}
if ($filename =~ /^[a-z]:/s) {
# nop
} elsif ($filename =~ /^[\\\/]/) {
$filename = $drive . $filename;
} else {
$filename = $dir . "/" . $filename if $dir ne "";
}
$directives[$count] = {};
$directives[$count]->{'TYPE'} = 'DTDDECL';
$directives[$count]->{'DTDDECL'} = $pubid;
$directives[$count]->{'FILE'} = $filename;
$count++;
next;
}
if ($keyword eq 'SGMLDECL') {
my($filename);
if (/^\s*[\"\']/s) {
($filename, $_) = &parse_quoted_string("CATALOG", $_);
} else {
/^\s*(\S+)/s;
$filename = $1;
$_ = $';
}
if ($filename =~ /^[a-z]:/s) {
# nop
} elsif ($filename =~ /^[\\\/]/) {
$filename = $drive . $filename;
} else {
$filename = $dir . "/" . $filename if $dir ne "";
}
$directives[$count] = {};
$directives[$count]->{'TYPE'} = 'SGMLDECL';
$directives[$count]->{'SGMLDECL'} = 'SGMLDECL';
$directives[$count]->{'FILE'} = $filename;
$count++;
next;
}
if ($keyword eq 'DOCTYPE') {
my($tag, $filename);
if (/^\s*[\"\']/s) {
($tag, $_) = &parse_quoted_string("CATALOG", $_);
} else {
/^\s*(\S+)/s;
$tag = $1;
$_ = $';
}
if (/^\s*[\"\']/s) {
($filename, $_) = &parse_quoted_string("CATALOG", $_);
} else {
/^\s*(\S+)/s;
$filename = $1;
$_ = $';
}
if ($filename =~ /^[a-z]:/s) {
# nop
} elsif ($filename =~ /^[\\\/]/) {
$filename = $drive . $filename;
} else {
$filename = $dir . "/" . $filename if $dir ne "";
}
# nop...
next;
}
if ($keyword =~ /^\-\-/) {
$_ = $keyword . $_;
/^--.*?--/s;
$_ = $';
next;
}
die "Don't know how to parse CATALOG keyword: $keyword\n";
}
# now populate the real array; making sure that SGMLDECL goes to
# the end of the array
foreach my $dir (@directives) {
my %hash = %{$dir};
next if $hash{'TYPE'} eq 'SGMLDECL';
push(@{$self->{'DIRECTIVE'}}, $dir);
}
foreach my $dir (@directives) {
my %hash = %{$dir};
next if $hash{'TYPE'} ne 'SGMLDECL';
push(@{$self->{'DIRECTIVE'}}, $dir);
}
return 1;
}
sub strip_comment {
my($text) = shift;
while ($text =~ /^\s*--.*?--/s) {
$text = $';
}
return $text;
}
sub parse_quoted_string {
my($decl, $entity) = @_;
my($text);
if ($entity =~ /^\s*\"/s) {
die "Unparseable text: $decl\n" if $entity !~ /^\s*\"(.*?)\"/s;
$text = $1;
$entity = &strip_comment($');
} elsif ($entity =~ /^\s*\'/s) {
die "Unparseable text: $decl\n" if $entity !~ /^\s*\'(.*?)\'/s;
$text = $1;
$entity = &strip_comment($');
} else {
die "Unexpected text: $decl\n";
}
return ($text, $entity);
}
1;