Text::TEI::Collate::Word - Text::TEI::Collate::Word documentation


Text-TEI-Collate documentation Contained in the Text-TEI-Collate distribution.

Index


Code Index:

DESCRIPTION

Top

Text::TEI::Collate::Word is an object that describes a word in a collated text. This may be a useful way for editors of other things to plug in their own logic.

METHODS

Top

new

Creates a new word object. Should not be called directly.

Access methods

Top

word

If called with an argument, sets the stripped form of the word that should be used for comparison. Returns the word's stripped form.

printable

Return either the word or the 'special', as applicable

original_form

If called with an argument, sets the form of the word, punctuation and all, that was originally passed. Returns the word's original form.

accented_form

If called with an argument, sets the accented form of the word (minus punctuation). Returns the word's accented form.

canonical_form

If called with an argument, sets the canonical form of the word (minus punctuation). Returns the word's canonical form.

comparison_form

If called with an argument, sets the comparison form of the word (using a set standard for orthographic equivalence.) Returns the word's comparison form.

punctuation

If called with an argument, sets the punctuation marks that were passed with the word. Returns the word's puncutation.

canonizer

If called with an argument, sets the canonizer subroutine that the word object should use. Returns the subroutine.

comparator

If called with an argument, sets the comparator subroutine that the word object should use. Returns the subroutine.

special

Returns a word's special value. Used for meta-words like BEGIN and END.

is_empty

Returns whether this is an empty word. Useful to distinguish from a special word.

state

Returns a hash of all the values that might be changed by a re-comparison. Useful to 'back up' a word before attempting a rematch. Currently does not expect any of the 'mutable' keys to contain data structure refs.

is_glommed

Returns true if the word has been matched together with its following word. If passed with an argument, sets this value.

is_base

Returns true if the word has been matched together with its following word. If passed with an argument, sets this value.

placeholders

Returns the sectional markers, if any, that go before the word.

add_placeholder

Adds a sectional marker that should precede the word in question.

ms_sigil

Returns the sigil of the manuscript wherein this word appears.

variants

Returns the list of variants, or an empty list.

add_variant

Adds to the list of 'different' words in this word's column.

BUGS / TODO

Top

Many things. I shall enumerate them later.

AUTHOR

Top

Tara L Andrews <aurum@cpan.org>


Text-TEI-Collate documentation Contained in the Text-TEI-Collate distribution.
package Text::TEI::Collate::Word;

use strict;
use vars qw( $VERSION );

$VERSION = "0.01";

sub new {
    my $proto = shift;
    my %opts = @_;
    my $class = ref( $proto ) || $proto;
    my $init_string;
    if( exists $opts{'string'} ) {
	$init_string = delete $opts{'string'};
    }
    my $self = { 'not_punct' => [],
		 'accents' => [],
		 'canonizer' => undef,
		 'comparator' => undef,
		 %opts,
    };
    
    bless $self, $class;
    if( $self->{'special'} ) {
	$self->{'invisible'} = 1;
    }
    $init_string = '' if( $self->{'empty'} );
    $self->evaluate_word( $init_string );
    return $self;
}

sub evaluate_word {
    my $self = shift;
    my $word = shift;

    unless( defined $word ) {
	$word = '';
	return;
    }

    # Preserve the original word, weird orthography and all.
    if( $self->original_form ) {
	warn "Called evaluate_word on an object that already has a word";
	return undef;
    } else {
	$self->original_form( $word );
    }

    # Canonicalize the word.  Should not yet get rid of any attributes.
    if( defined $self->canonizer ) {
	$word = &{$self->canonizer}( $word );
    }
    $self->canonical_form( $word );

    # Need to ascertain a few characteristics.
    # Has it any punctuation to go with the word, that is not in our
    # list of "not really punctuation"?
    my( $punct, $accent ) = ( [], undef );	    
    my @punct_instances = $word =~ /([[:punct:]])/g;
    foreach my $p ( @punct_instances ) {
	next if( grep /\Q$p\E/, @{$self->{'not_punct'}} );
	push( @$punct, $p );
	$word =~ s/\Q$p\E//g;
    }
    $self->punctuation( $punct );
    # TODO: something sensible with accent marks

    if( defined $self->comparator ) {
	$self->comparison_form( &{$self->comparator}( $word ) );
    } else {
	$self->comparison_form( $word );
    }

    $self->word( $word );
}

# Accessors.

sub word {
    my $self = shift;
    my $form = shift;
    if( defined $form ) {
	$self->{'word'} = $form;
    } 	
    return $self->{'invisible'} ? '' : $self->{'word'}
}

sub printable {
    my $self = shift;
    return $self->special ? $self->special : $self->canonical_form;
}

sub original_form {
    my $self = shift;
    my $form = shift;
    if( defined $form ) {
	$self->{'original_form'} = $form;
    }
    return $self->{'original_form'};
}

sub accented_form {
    my $self = shift;
    my $form = shift;
    if( defined $form ) {
	$self->{'accented_form'} = $form;
    }
    return $self->{'accented_form'};
}

sub canonical_form {
    my $self = shift;
    my $form = shift;
    if( defined $form ) {
	$self->{'canonical_form'} = $form;
    }
    return $self->{'canonical_form'};
}

sub comparison_form {
    my $self = shift;
    my $form = shift;
    if( defined $form ) {
	$self->{'comparison_form'} = $form;
    }
    return $self->{'comparison_form'};
}

sub punctuation {
    my $self = shift;
    my $punct = shift;
    if( $punct ) {
	$self->{'punctuation'} = $punct;
    }
    return @{$self->{'punctuation'}};
}

sub canonizer {
    my $self = shift;
    my $punct = shift;
    if( $punct ) {
	$self->{'canonizer'} = $punct;
    }
    return $self->{'canonizer'};
}

sub comparator {
    my $self = shift;
    my $punct = shift;
    if( $punct ) {
	$self->{'comparator'} = $punct;
    }
    return $self->{'comparator'};
}

sub special {
    my $self = shift;
    return unless exists( $self->{'special'} );
    return $self->{'special'};
}

sub is_empty {
    my $self = shift;
    return $self->{'empty'};
}

my @mutable_keys = qw( glommed );
sub state {
    my $self = shift;
    my $opts = {};
    foreach my $key( @mutable_keys ) {
	warn( "Not making full copy of ref stored in $key" ) 
	    if ref( $self->{$key} );
	$opts->{$key} = $self->{$key};
    }
    return $opts;
}

sub restore_state {
    my $self = shift;
    my $opts = shift;
    return unless ref( $opts ) eq 'HASH';
    foreach my $key( @mutable_keys ) {
	$self->{$key} = $opts->{$key};
    }
}

sub is_glommed {
    my $self = shift;
    my $val = shift;
    if( defined( $val ) ) {
	$self->{'glommed'} = $val;
    }
    return $self->{'glommed'};
}

sub is_base {
    my $self = shift;
    my $val = shift;
    if( defined( $val ) ) {
	$self->{'base'} = $val;
    }
    return $self->{'base'};
}

sub placeholders {
    my $self = shift;
    return exists $self->{'placeholders'} ? @{$self->{'placeholders'}} : ();
}

sub add_placeholder {
    my $self = shift;
    my $new_ph = shift;
    unless( $self->{'placeholders'} ) {
	$self->{'placeholders'} = [];
    }
    push( @{$self->{'placeholders'}}, $new_ph );
}
    

sub ms_sigil {
    my $self = shift;
    return exists $self->{'ms_sigil'} ? $self->{'ms_sigil'} : '';
}

### Links

sub links {
    my $self = shift;
    return exists $self->{'links'} ? @{$self->{'links'}} : ();
}

sub add_link {
    my $self = shift;
    my $new_obj = shift;
    unless( ref( $new_obj ) eq 'Text::TEI::Collate::Word' ) {
	warn "Cannot add a link to a non-word";
	return;
    }
    my $links = exists $self->{'links'} ? $self->{'links'} : [];
    push( @$links, $new_obj );
    $self->{'links'} = $links;
}

sub variants {
    my $self = shift;
    return exists $self->{'variants'} ? @{$self->{'variants'}} : ();
}

sub add_variant {
    my $self = shift;
    my $new_obj = shift;
    unless( ref( $new_obj ) eq 'Text::TEI::Collate::Word' ) {
	warn "Cannot add a non-word as a variant";
	return;
    }
    my $variants = exists $self->{'variants'} ? $self->{'variants'} : [];
    push( @$variants, $new_obj );
    $self->{'variants'} = $variants;
}