/usr/local/CPAN/Combine/classifyPlugInTemplate.pm


#Template for writing a classify PlugIn for Combine
#See documentation at http://combine.it.lth.se/documentation/

package classifyPlugInTemplate; #Change to your own module name

use Combine::XWI; #Mandatory
use Combine::Config; #Optional if you want to use the Combine configuration system

#API:
#  a subroutine named 'classify' taking a XWI-object as in parameter
#    return values: 0/1
#        0: record fails to meet the classification criteria, ie ignore this record
#        1: record is OK and should be stored in the database, and links followed by the crawler
sub classify { 
  my ($self,$xwi) = @_;

  #utility routines to extract information from the XWI-object
  #URL (can be several):
   # $xwi->url_rewind;
   # my $url_str="";
   # my $t;
   # while ($t = $xwi->url_get) { $url_str .= $t . ", "; }

  #Metadata:
   #  $xwi->meta_rewind;
   #  my ($name,$content);
   #  while (1) {
   #    ($name,$content) = $xwi->meta_get;
   #    last unless $name;
   #    next if ($name eq 'Rsummary');
   #    next if ($name =~ /^autoclass/);
   #    $meta .= $content . " ";
   #  } 

  #Title:
   #  $title = $xwi->title;

  #Headings:
   #  $xwi->heading_rewind;
   #  my $this;
   #  while (1) {
   #    $this = $xwi->heading_get or last; 
   #    $head .= $this . " "; 
   #  }

  #Text:
   #  $this = $xwi->text;
   #  if ($this) {
   #    $text = $$this;
   #  }

###############################
#Apply your classification algorithm here
#  assign $result a value (0/1)
###############################

  #utility routines for saving detailed results (optional) in the database. These data may appear
  # in exported XML-records

  #Topic takes 5 parameters
  # $xwi->topic_add(topic_class_notation, topic_absolute_score, topic_normalized_score, topic_terms, algorithm_id);
  #  topic_class_notation, topic_terms, and algorithm_id are strings
  #    max length topic_class_notation: 50, algorithm_id: 25
  #  topic_absolute_score, and topic_normalized_score are integers
  #  topic_normalized_score and topic_terms are optional and may be replaced with 0, '' respectively

  #Analysis takes 2 parameters
  # $xwi->robot_add(name,value);
  # both are strings with max length name: 15, value: 20

    # return true (1) if you want to keep the record
    # otherwise return false (0)

  return $result;
}

1;