Parse::Tokens - class for parsing text with embedded tokens


Parse-Tokens documentation Contained in the Parse-Tokens distribution.

Index


Code Index:

NAME

Top

Parse::Tokens - class for parsing text with embedded tokens

SYNOPSIS

Top

  package MyParser;
  use base 'Parse::Tokens';

  MyParser->new->parse({
      text => q{Hi my name is <? $name ?>.},
      hash => {name=>'John Doe'},
      delimiters => [['<?','?>']],
  });

  # override SUPER::token
  sub token
  {
      my( $self, $token ) = @_;
      # $token->[0] - left bracket
      # $token->[1] - contents
      # $token->[2] - right bracket
      # do something with the token...
  }

  # override SUPER::token
  sub ether
  {
      my( $self, $text ) = @_;
      # do something with the text...
  }




DESCRIPTION Parse::Tokens provides a base class for parsing delimited strings from text blocks. Use Parse::Tokens as a base class for your own module or script. Very similar in style to HTML::Parser.

Top

METHODS

Top

new()
  Pass parameter as a hash reference.
  Options are specified in the getter/setter methods.




flush()
  Flush the template cash.




parse()
  Run the parser.




SETTER/GETTER METHODS

Top

autoflush()
  Turn on autoflushing causing the template cash (not the text) to be purged before each call to parse();.




delimiters()
  Specify delimiters as an array reference pointing to the left and right delimiters. Returns array reference containing two array references of delimiters and escaped delimiters.




debug()
  Turn on debug mode. 1 is on, 0 is off.




ether_callback()
  Sets/gets the callback code reference for the 'ether' event.




loose_paring()
  Allow any combination of delimiters to match. Default is turned of requiring exactly specified pair matches only.




post_callback()
  Sets/gets the callback code reference for the 'post_parse' event.




pre_callback()
  Sets/gets the callback code reference for the 'pre_parse' event.




push_delimiters()
  Add a delimiter pair (array ref) to the list of delimiters.




text()
  Load text.




token_callback()
  Sets/gets the callback code reference for the 'token' event.

EVENT METHODS

Top

ether()
  Event method that gets called when non-token text is encountered during parsing.




post_parse()
  Event method that gets called after parsing has completed.




pre_parse()
  Event method that gets called prior to parsing commencing.




token()
  Event method that gets called when a token is encountered during parsing.

HISTORY

Top

  Cleanup of internal documentation.

  Added support for callbacks.
  Improved debug messaging.
  Fixed bug in delimiter assignment.
  Rearranged distribution files.

  Added sample script and sample data.

  Fixed pseudo bug relation to regular expression 'o' option.
  Aliased 'add_delimiters' to 'push_delimiters'.
  Misc internal changes.

  Add push_delimiters method for adding to the delimiter array.

  Add pre_parse and post_parse methods; add minimal debug message support.

  Add multi-token support.

AUTHOR

Top

Steve McKay, steve@colgreen.com

COPYRIGHT

Top


Parse-Tokens documentation Contained in the Parse-Tokens distribution.

package Parse::Tokens;

# $Id: Tokens.pm,v 1.5 2001/11/28 01:14:55 steve Exp $

# Copyright 2000-2001 by Steve McKay. All rights reserved.
# This library is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.

use strict;
use vars qw( $VERSION  );

$VERSION = 0.27;

sub new
{
	my ( $proto, $params ) = @_;
	my $class = ref($proto) || $proto;
	my $self = {
		debug => undef,
		text => undef,
		autoflush => undef,
		loose_paring => undef,
		pre_callback => undef,
		post_callback => undef,
		token_callback => undef,
		ether_callback => undef,
		delimiters => [],
		delim_index => {},
	};
	bless( $self, $class );
	$self->init( $params );
	$self;
}

sub init
{
    my( $self, @args ) = @_;
	no strict 'refs';
	$self->_msg( "Processing initialization arguments." );
	for ( keys %{$args[0]} )
	{
		my $ref = lc $_;
		$self->$ref( $args[0]->{$_} );
	}
	use strict;
}

sub debug
{
	my( $self, @args ) = @_;
	$self->_msg( "Storing 'debug' prefs." );
	$self->{'debug'} = $args[0] if defined $args[0];
	return $self->{'debug'};
}

sub token_callback
{
	my( $self, @args ) = @_;
	$self->_msg( "Storing 'token_callback' prefs." );
	$self->{'token_callback'} = $args[0] if defined $args[0];
	return $self->{'token_callback'};
}

sub ether_callback
{
	my( $self, @args ) = @_;
	$self->_msg( "Storing 'ether_callback' prefs." );
	$self->{'ether_callback'} = $args[0] if defined $args[0];
	return $self->{'ether_callback'};
}

sub pre_callback
{
	my( $self, @args ) = @_;
	$self->_msg( "Storing 'pre_callback' prefs." );
	$self->{'pre_callback'} = $args[0] if defined $args[0];
	return $self->{'pre_callback'};
}

sub post_callback
{
	my( $self, @args ) = @_;
	$self->_msg( "Storing 'post_callback' prefs." );
	$self->{'post_callback'} = $args[0] if defined $args[0];
	return $self->{'post_callback'};
}

sub loose_paring
{
	my( $self, @args ) = @_;
	$self->_msg( "Storing 'loose_paring' prefs." );
	$self->{'loose_paring'} = $args[0] if defined $args[0];
	return $self->{'loose_paring'};
}

sub autoflush
{
	my( $self, @args ) = @_;
	$self->_msg( "Storing 'autoflush' prefs." );
	$self->{'autoflush'} = $args[0] if defined $args[0];
	return $self->{'autoflush'};
}

sub text
{
	my( $self, @args ) = @_;
	$self->_msg( "Storing 'text'." );
	$self->flush();
	$self->{'text'} = $args[0] if defined $args[0];
	return $self->{'text'};
}

sub delimiters
{
	my( $self, @args ) = @_;
	# we currently support both a ref to an array of delims
	# as well as an ref to an array of array refs with delims
	if ( ref($args[0]) eq 'ARRAY' )
	{
		# wipe our existing delimiters
		$self->{'delimiters'} = [];
		# we have multiple arrays
		if( ref($args[0]->[0]) eq 'ARRAY' )
		{
			for( @{$args[0]} )
			{
				$self->push_delimiters( $_ );
			}	
		}
		# we have only this array ref
		else
		{
			$self->push_delimiters( $args[0] );
		}
	}
	return @{$self->{'delimiters'}};
}

*add_delimiters = \&push_delimiters;
sub push_delimiters
{ 
	# add a delim pair (real and quoted) to the delimiters array
	my( $self, @args ) = @_;
	$self->_msg( "Adding delimiter pair." );
	if( ref($args[0]) eq 'ARRAY' )
	{
		push(
			@{$self->{'delimiters'}}, {
				real	=> $args[0],
				quoted	=> [
					quotemeta($args[0]->[0]),
					quotemeta($args[0]->[1])
				]
			}
		);
		$self->{'delim_index'}->{$args[0]->[0]} = $#{$self->{delimiters}};
		$self->{'delim_index'}->{$args[0]->[1]} = $#{$self->{delimiters}};
	}
	else
	{
		warn "Args to push_delimiter not an array reference";
	}
	return 1;
}

sub flush
{
	my( $self ) = @_;
	$self->_msg( "Flushing cached parts." );
	delete $self->{'cache'};
	return 1;
}

sub parse
{
	my( $self, @args ) = @_;
	$self->pre_parse();
	$self->init( $args[0] );
	return unless defined $self->{'text'};
	$self->flush() if $self->{'autoflush'};

	my @delim = $self->delimiters();
	my $match_rex = $self->match_expression( \@delim );

	unless( $self->{'cache'} )
	{
		# parse the text
		$self->_msg( "Data not cached. Parsing text." );
		my @chunk = split( m/$match_rex/s, $self->{'text'} );
		@{$self->{'cache'}} = @chunk;
	}

	$self->_msg( "Processing parsed text parts." );
	my $n = 0;
	while ($n <= $#{$self->{'cache'}})
	{
		# find opening delimiter
		
		# if the first element of the token is the element of a token
		#if ( $self->{cache}->[$n] eq $delim[0]->{real}->[0] || $self->{cache}->[$n] eq $delim[1]->{real}->[0] )
		if ( $self->{'cache'}->[$n] eq $delim[$self->{'delim_index'}->{$self->{'cache'}->[$n]}]->{'real'}->[0] )
		{
			$self->_msg( "Dispatching token." );
			$self->token([
				$self->{'cache'}->[$n],
				$self->{'cache'}->[++$n],
				$self->{'cache'}->[++$n]
			]);
		}

		# or it's just text
		else
		{
			$self->_msg( "Dispatching text." );
			$self->ether( $self->{'cache'}->[$n] );
		}
		$n++
	}
	$self->post_parse();
}

sub match_expression
{
	# construct our token finding regular expression
	my( $self, $delim ) = @_;
	my $rex;
	if( $self->{'loose_paring'} )
	{
		my( @left, @right );
		for( @$delim )
		{
			push( @left, $_->{'quoted'}->[0] );
			push( @right, $_->{'quoted'}->[1] );
		}
		$rex = '('.join('|', @left).')(.*?)('.join('|', @right).')';
	}
	else
	{
		my( @sets );
		for( @$delim )
		{
			push( @sets, qq{($_->{'quoted'}->[0])(.*?)($_->{'quoted'}->[1])} );
		}
		$rex = join( '|', @sets );
	}
	$self->_msg( "Constructed '$rex' pattern matching expression." );
	$self->{'match_expression'} = $rex;
	return $rex;
}

# a token consists of a left-delimiter, the contents, and a right-delimiter
*atom = \&token;
sub token
{
	my( $self, $token ) = @_;
	$self->_msg( "Found token ", join( ', ', @$token ) );
	if( $self->{'token_callback'} )
	{
		$self->_msg( "Dispatching token to callback handler '$self->{'token_callback'}'." );
		no strict 'refs';
		&{$self->{'token_callback'}}( $token );
		use strict;
	}
	else
	{
		$self->_msg( "Consider overriding my 'token' method." );
	}
	return 1;
}

# ether is anything not contained in an atom
sub ether
{
	my( $self, $text ) = @_;
	$self->_msg( "Found text ", $text );
	if( $self->{'ether_callback'} )
	{
		$self->_msg( "Dispatching text to callback handler '$self->{'ether_callback'}'." );
		no strict 'refs';
		&{$self->{'ether_callback'}}( $text );
		use strict;
	}
	else {
		$self->_msg( "Consider overriding my 'ether' method." );
	}
	return 1;
}

# this is called just before parsing begins
sub pre_parse
{
	my( $self ) = @_;
	if( $self->{'pre_callback'} )
	{
		$self->_msg( "Dispatching pre_parse event to callback handler '$self->{'pre_callback'}'." );
		no strict 'refs';
		&{$self->{'pre_callback'}}();
		use strict;
	}
	else
	{
		$self->_msg( "Consider overriding my 'pre_parse' method." );
	}
	return 1;
}


# this is called just after parsing ends
sub post_parse
{
	my( $self ) = @_;
	if( $self->{'post_callback'} )
	{
		$self->_msg( "Dispatching post_parse event to callback handler '$self->{'post_callback'}'." );
		no strict 'refs';
		&{$self->{'post_callback'}}();
		use strict;
	}
	else
	{
		$self->_msg( "Consider overriding my 'post_parse' method." );
	}
	return 1;
}

sub _msg
{
	my( $self, @msg ) = @_;
	if( $self->{'debug'} )
	{
		warn __PACKAGE__, ' - ', @msg;
	}
	return 1;
}

1;

__END__