| Text-TEI-Collate documentation | Contained in the Text-TEI-Collate distribution. |
Text::TEI::Collate::Word is an object that describes a word in a collated text. This may be a useful way for editors of other things to plug in their own logic.
Creates a new word object. Should not be called directly.
If called with an argument, sets the stripped form of the word that should be used for comparison. Returns the word's stripped form.
Return either the word or the 'special', as applicable
If called with an argument, sets the form of the word, punctuation and all, that was originally passed. Returns the word's original form.
If called with an argument, sets the accented form of the word (minus punctuation). Returns the word's accented form.
If called with an argument, sets the canonical form of the word (minus punctuation). Returns the word's canonical form.
If called with an argument, sets the comparison form of the word (using a set standard for orthographic equivalence.) Returns the word's comparison form.
If called with an argument, sets the punctuation marks that were passed with the word. Returns the word's puncutation.
If called with an argument, sets the canonizer subroutine that the word object should use. Returns the subroutine.
If called with an argument, sets the comparator subroutine that the word object should use. Returns the subroutine.
Returns a word's special value. Used for meta-words like BEGIN and END.
Returns whether this is an empty word. Useful to distinguish from a special word.
Returns a hash of all the values that might be changed by a re-comparison. Useful to 'back up' a word before attempting a rematch. Currently does not expect any of the 'mutable' keys to contain data structure refs.
Returns true if the word has been matched together with its following word. If passed with an argument, sets this value.
Returns true if the word has been matched together with its following word. If passed with an argument, sets this value.
Returns the sectional markers, if any, that go before the word.
Adds a sectional marker that should precede the word in question.
Returns the sigil of the manuscript wherein this word appears.
Returns the list of links, or an empty list.
Adds to the list of 'like' words in this word's column.
Returns the list of variants, or an empty list.
Adds to the list of 'different' words in this word's column.
Many things. I shall enumerate them later.
Tara L Andrews <aurum@cpan.org>
| Text-TEI-Collate documentation | Contained in the Text-TEI-Collate distribution. |
package Text::TEI::Collate::Word; use strict; use vars qw( $VERSION ); $VERSION = "0.01";
sub new { my $proto = shift; my %opts = @_; my $class = ref( $proto ) || $proto; my $init_string; if( exists $opts{'string'} ) { $init_string = delete $opts{'string'}; } my $self = { 'not_punct' => [], 'accents' => [], 'canonizer' => undef, 'comparator' => undef, %opts, }; bless $self, $class; if( $self->{'special'} ) { $self->{'invisible'} = 1; } $init_string = '' if( $self->{'empty'} ); $self->evaluate_word( $init_string ); return $self; } sub evaluate_word { my $self = shift; my $word = shift; unless( defined $word ) { $word = ''; return; } # Preserve the original word, weird orthography and all. if( $self->original_form ) { warn "Called evaluate_word on an object that already has a word"; return undef; } else { $self->original_form( $word ); } # Canonicalize the word. Should not yet get rid of any attributes. if( defined $self->canonizer ) { $word = &{$self->canonizer}( $word ); } $self->canonical_form( $word ); # Need to ascertain a few characteristics. # Has it any punctuation to go with the word, that is not in our # list of "not really punctuation"? my( $punct, $accent ) = ( [], undef ); my @punct_instances = $word =~ /([[:punct:]])/g; foreach my $p ( @punct_instances ) { next if( grep /\Q$p\E/, @{$self->{'not_punct'}} ); push( @$punct, $p ); $word =~ s/\Q$p\E//g; } $self->punctuation( $punct ); # TODO: something sensible with accent marks if( defined $self->comparator ) { $self->comparison_form( &{$self->comparator}( $word ) ); } else { $self->comparison_form( $word ); } $self->word( $word ); } # Accessors.
sub word { my $self = shift; my $form = shift; if( defined $form ) { $self->{'word'} = $form; } return $self->{'invisible'} ? '' : $self->{'word'} }
sub printable { my $self = shift; return $self->special ? $self->special : $self->canonical_form; }
sub original_form { my $self = shift; my $form = shift; if( defined $form ) { $self->{'original_form'} = $form; } return $self->{'original_form'}; }
sub accented_form { my $self = shift; my $form = shift; if( defined $form ) { $self->{'accented_form'} = $form; } return $self->{'accented_form'}; }
sub canonical_form { my $self = shift; my $form = shift; if( defined $form ) { $self->{'canonical_form'} = $form; } return $self->{'canonical_form'}; }
sub comparison_form { my $self = shift; my $form = shift; if( defined $form ) { $self->{'comparison_form'} = $form; } return $self->{'comparison_form'}; }
sub punctuation { my $self = shift; my $punct = shift; if( $punct ) { $self->{'punctuation'} = $punct; } return @{$self->{'punctuation'}}; }
sub canonizer { my $self = shift; my $punct = shift; if( $punct ) { $self->{'canonizer'} = $punct; } return $self->{'canonizer'}; }
sub comparator { my $self = shift; my $punct = shift; if( $punct ) { $self->{'comparator'} = $punct; } return $self->{'comparator'}; }
sub special { my $self = shift; return unless exists( $self->{'special'} ); return $self->{'special'}; }
sub is_empty { my $self = shift; return $self->{'empty'}; }
my @mutable_keys = qw( glommed ); sub state { my $self = shift; my $opts = {}; foreach my $key( @mutable_keys ) { warn( "Not making full copy of ref stored in $key" ) if ref( $self->{$key} ); $opts->{$key} = $self->{$key}; } return $opts; } sub restore_state { my $self = shift; my $opts = shift; return unless ref( $opts ) eq 'HASH'; foreach my $key( @mutable_keys ) { $self->{$key} = $opts->{$key}; } }
sub is_glommed { my $self = shift; my $val = shift; if( defined( $val ) ) { $self->{'glommed'} = $val; } return $self->{'glommed'}; }
sub is_base { my $self = shift; my $val = shift; if( defined( $val ) ) { $self->{'base'} = $val; } return $self->{'base'}; }
sub placeholders { my $self = shift; return exists $self->{'placeholders'} ? @{$self->{'placeholders'}} : (); }
sub add_placeholder { my $self = shift; my $new_ph = shift; unless( $self->{'placeholders'} ) { $self->{'placeholders'} = []; } push( @{$self->{'placeholders'}}, $new_ph ); }
sub ms_sigil { my $self = shift; return exists $self->{'ms_sigil'} ? $self->{'ms_sigil'} : ''; } ### Links
sub links { my $self = shift; return exists $self->{'links'} ? @{$self->{'links'}} : (); }
sub add_link { my $self = shift; my $new_obj = shift; unless( ref( $new_obj ) eq 'Text::TEI::Collate::Word' ) { warn "Cannot add a link to a non-word"; return; } my $links = exists $self->{'links'} ? $self->{'links'} : []; push( @$links, $new_obj ); $self->{'links'} = $links; }
sub variants { my $self = shift; return exists $self->{'variants'} ? @{$self->{'variants'}} : (); }
sub add_variant { my $self = shift; my $new_obj = shift; unless( ref( $new_obj ) eq 'Text::TEI::Collate::Word' ) { warn "Cannot add a non-word as a variant"; return; } my $variants = exists $self->{'variants'} ? $self->{'variants'} : []; push( @$variants, $new_obj ); $self->{'variants'} = $variants; }