ZOOM::IRSpy::Stats - statistics generated for IRSpy about its targets


ZOOM-IRSpy documentation Contained in the ZOOM-IRSpy distribution.

Index


Code Index:

NAME

Top

ZOOM::IRSpy::Stats - statistics generated for IRSpy about its targets

SYNOPSIS

Top

 $stats = new ZOOM::IRSpy::Stats($dbname);
 $stats->print();

DESCRIPTION

Top

Provides a simple API to obtaining statistics about targets registered in IRSpy. This is done just by creating a Stats object. Once this object is made, it can be crudely printed using the built-in debugging print() method, or the application can walk the structure to produce nice output.

METHODS

Top

new()

 $stats = new ZOOM::IRSpy::Stats($dbname, "dc.creator=wedel");
 # Or:
 $stats = new ZOOM::IRSpy::Stats($dbname,
         new ZOOM::Query::PQF('@attr 1=1003 wedel');
 # Or:
 $spy = new ZOOM::Connection("target/string/for/irspy/database"); 
 $stats = new ZOOM::IRSpy::Stats($spy, $query);

Creates a new ZOOM::IRSpy::Stats object and populates it with statistics for the targets in the nominated database. This process involves analysing the nominated IRSpy database at some length, and which therefore takes some time

Either one or two arguments are required:

$conn (mandatory)

An indication of the IRSpy database that statistics are required for. This may be in the form of a ZOOM::Connection object or a database-name string such as localhost:8018/IR-Explain---1.

$query (optional)

The query with which to select a subset of the database to be analysed. This may be in the form of a ZOOM::Query object (using any of the supported subclasses) or a CQL string. If this is omitted, then all records in the database are included in the generated statistics.

print()

SEE ALSO

Top

ZOOM::IRSpy

AUTHOR

Top

Mike Taylor, <mike@indexdata.com>

COPYRIGHT AND LICENSE

Top


ZOOM-IRSpy documentation Contained in the ZOOM-IRSpy distribution.
package ZOOM::IRSpy::Stats;

use 5.008;
use strict;
use warnings;

use Scalar::Util;
use ZOOM::IRSpy::Utils qw(irspy_xpath_context);

sub new {
    my $class = shift();
    my($conn, $query) = @_;

    $query ||= "cql.allRecords=1",
    $conn = new ZOOM::Connection($conn) if !ref $conn;
    $query = new ZOOM::Query::CQL($query) if !ref $query;

    my $oldSyntax = $conn->option("preferredRecordSyntax");
    my $oldESN = $conn->option("elementSetName");
    my $oldPC = $conn->option("presentChunk");
    $conn->option(preferredRecordSyntax => "xml");
    $conn->option(elementSetName => "zeerex");
#    $conn->option(presentChunk => 10);

    my $rs = $conn->search($query);
    my $n = $rs->size();

    my $this = bless {
	host => $conn->option("host"),
	conn => $conn,
	query => $query,
	rs => $rs,
	n => $n,
    }, $class;

    $this->_gather_stats();
    $conn->option(preferredRecordSyntax => $oldSyntax);
    $conn->option(elementSetName => $oldESN);
    $conn->option(presentChunk => $oldPC);

    return $this;
}


sub _gather_stats {
    my $this = shift();

    foreach my $i (0 .. $this->{n}-1) {
	my $rec = $this->{rs}->record($i);
	my $xc = irspy_xpath_context($rec);

	# The ten most commonly supported Bib-1 Use attributes
	foreach my $node ($xc->findnodes('e:indexInfo/e:index[@search="true"]/e:map/e:attr[@type=1 and @set="bib-1"]')) {
	    $this->{bib1AccessPoints}->{$node->findvalue(".")}++;
	}

	# Record syntax support by database
	foreach my $node ($xc->findnodes('e:recordInfo/e:recordSyntax/@name')) {
	    $this->{recordSyntaxes}->{lc($node->findvalue("."))}++;
	}

	# Explain support
	foreach my $node ($xc->findnodes('i:status/i:explain[@ok="1"]/@category')) {
	    $this->{explain}->{$node->findvalue(".")}++;
	}

	# Z39.50 Protocol Services Support
	foreach my $node ($xc->findnodes('e:configInfo/e:supports')) {
	    my $supports = $node->findvalue('@type');
	    if ($node->findvalue(".") && $supports =~ s/^z3950_//) {
		$this->{z3950_init_opt}->{$supports}++;
	    }
	}

	# Z39.50 Server Atlas
	### TODO -- awkward, should be considered an enhancement

	# Top Domains
	my $host = $xc->findvalue('e:serverInfo/e:host');
	$host =~ s/.*\.//;
	$this->{domains}->{lc($host)}++;

	# Implementation
	foreach my $node ($xc->findnodes('i:status/i:serverImplementationName/@value')) {
	    $this->{implementation}->{$node->findvalue(".")}++;
	    last; # This is because many of the records are still
	          # polluted with multiple implementationName elements
	          # from back then XSLT stylesheet that generated
	          # ZeeRex records was wrong.
	}
    }
}


sub print {
    my $this = shift();

    print "database = '", $this->{conn}->option("host"), "'\n";
    print "query = '", $this->{query}, "'\n";
    print "result set = '", $this->{rs}, "'\n";
    print "count = '", $this->{n}, "'\n";
    my $hr;

    print "\nTOP 10 BIB-1 ATTRIBUTES\n";
    $hr = $this->{bib1AccessPoints};
    foreach my $key ((sort { $hr->{$b} <=> $hr->{$a} 
			     || $a <=> $b } keys %$hr)[0..9]) {
	print sprintf("%6d%20s%5d (%d%%)\n",
		      $key, "", $hr->{$key}, 100*$hr->{$key}/$this->{n});
    }

    print "\nRECORD SYNTAXES\n";
    $hr = $this->{recordSyntaxes};
    foreach my $key (sort { $hr->{$b} <=> $hr->{$a} 
			    || $a cmp $b } keys %$hr) {
	print sprintf("%-26s%5d (%d%%)\n",
		      $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
    }

    print "\nEXPLAIN SUPPORT\n";
    $hr = $this->{explain};
    foreach my $key (sort { $hr->{$b} <=> $hr->{$a} 
			    || $a cmp $b } keys %$hr) {
	print sprintf("%-26s%5d (%d%%)\n",
		      $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
    }

    print "\nZ39.50 PROTOCOL SERVICES SUPPORT\n";
    $hr = $this->{z3950_init_opt};
    foreach my $key (sort { $hr->{$b} <=> $hr->{$a} 
			    || $a cmp $b } keys %$hr) {
	print sprintf("%-26s%5d (%d%%)\n",
		      $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
    }

    print "\nTOP-LEVEL DOMAINS\n";
    $hr = $this->{domains};
    foreach my $key (sort { $hr->{$b} <=> $hr->{$a} 
			    || $a cmp $b } keys %$hr) {
	print sprintf("%-26s%5d (%d%%)\n",
		      $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
    }

    print "\nIMPLEMENTATIONS\n";
    $hr = $this->{implementation};
    foreach my $key (sort { $hr->{$b} <=> $hr->{$a} 
			    || $a cmp $b } keys %$hr) {
	print sprintf("%-26s%5d (%d%%)\n",
		      $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
    }
}


1;