Text::MetaText::Factory - Factory class for instatiating Directive objects.


Text-MetaText documentation Contained in the Text-MetaText distribution.

Index


Code Index:

NAME

Top

Text::MetaText::Factory - Factory class for instatiating Directive objects.

SYNOPSIS

Top

    use Text::MetaText::Factory;
    my $factory = Text::MetaText::Factory->new(\%cfg);

DESCRIPTION

Top

The Text::MetaText::Factory module is used by Text::MetaText to instantiate Text::MetaText::Directive objects. The Factory and Directive classes can be sub-classed to create a more specific processing system.

AUTHOR

Top

Andy Wardley <abw@kfs.org>

See also:

    http://www.kfs.org/~abw/

REVISION

Top

$Revision: 0.2 $

COPYRIGHT

Top

SEE ALSO

Top

For more information, see the main Text::MetaText documentation:

    perldoc Text::MetaText

For more information about the author and other Perl development work:

    http://www.kfs.org/~abw/
    http://www.kfs.org/~abw/perl/
    http://www.cre.canon.co.uk/perl/

For more information about Perl in general:

    http://www.perl.com/


Text-MetaText documentation Contained in the Text-MetaText distribution.

#============================================================================
#
# Text::MetaText::Factory
#
# DESCRIPTION
#   Objects of the the MetaText Factory class (Text::MetaText::Factory) 
#   are used to instantiate objects of the MetaText Directive class 
#   (Text::MetaText::Directive) or sub-classes.  The default factory
#   is responsible for parsing the contents of a directive string and 
#   creating from that a specifically configured Directive object.  The
#   MetaText object class (Text::MetaText) uses a factory instance (which
#   may be user-supplied at run-time) in constructing a parsed ("compiled")
#   representation of a document.  The Factory and Directive classes can
#   easily be sub-classed to derive more specific objects that can then be 
#   used in the standard MetaText framework.
#
# AUTHOR
#   Andy Wardley   <abw@kfs.org>
#
# COPYRIGHT
#   Copyright (C) 1996-1998 Andy Wardley.  All Rights Reserved.
#
#   This module is free software; you can redistribute it and/or
#   modify it under the terms of the Perl Artistic Licence.
#
#----------------------------------------------------------------------------
#
# $Id: Factory.pm,v 0.2 1998/09/01 12:59:45 abw Exp abw $
#
#============================================================================
 
package Text::MetaText::Factory;

use strict;
use vars qw( $VERSION $DIRECTIVE $ERROR $CONTROL );

use Text::MetaText::Directive;

require 5.004;



#========================================================================
#                      -----  CONFIGURATION  -----
#========================================================================
 
$VERSION    = sprintf("%d.%02d", q$Revision: 0.2 $ =~ /(\d+)\.(\d+)/);
$DIRECTIVE  = 'Text::MetaText::Directive';  # default directive type

# define the control parameters valid for each directive type
$CONTROL    = {
    DEFINE  => { map { $_ => 1 } qw( IF UNLESS ) },
    SUBST   => { map { $_ => 1 } qw( IF UNLESS FORMAT FILTER ) },
    INCLUDE => { map { $_ => 1 } qw( IF UNLESS FORMAT FILTER ) },
    BLOCK   => { map { $_ => 1 } qw( PRINT TRIM ) },
};



#========================================================================
#                      -----  PUBLIC METHODS -----
#========================================================================
 
#========================================================================
#
# new(\%cfg)
#
# Object constructor.  A reference to a hash array of configuration 
# options may be passed which is then delegated to _configure() to 
# process.
#
# Returns a reference to a newly created Text::MetaText::Factory object.
#
#========================================================================

sub new {
    my $self   = shift;
    my $class  = ref($self) || $self;
    my $cfg    = shift;


    # make me an object
    $self = bless { }, $class;

    # the configuration hash, $cfg, may contain an entry $cfg->{ DIRECTIVE }
    # which names the directive class which the factory is expected to 
    # instantiate.  If this is undefined, the $DIRECTIVE variable in the 
    # class package (which may be a sub-class) and in the current (base-
    # class) package are checked, in that order, and used if defined.
    {
	# turn off strict reference checking for this block so that we can 
	# construct a variable name in the calling package without warning
	no strict 'refs';

	$self->{ DIRECTIVE } =            # specified in...
	       $cfg->{ DIRECTIVE }        # ...the $cfg hashref
	    || ${ "$class\::DIRECTIVE" }  # ...the calling package, $class
	    || $DIRECTIVE;                # ...the current (base) package
    }

    # we call _configure() which in this base class does very little but 
    # acts as a convenient hook for sub-classes.   The return value of 
    # _configure() indicates if the constructor should return a $self 
    # reference to indicate success (any true value) or undef to indicate 
    # failure (any false value)
    $self->_configure($cfg)
	? $self
	: undef;
}



#========================================================================
#
# create_directive($text)
#
# The public method create_directive() is called by Text::MetaText when
# is has identified a text string enclosed in the MetaText magic marker
# tokens (default: '%%' ... '%%') which it needs converting to a 
# Directive object.  The text string to be converted is passed in the 
# only parameter, $text.
#
# Returns a reference to a newly created Text::MetaText::Directive 
# object, or derivative.  On error, undef is returned and an appropriate 
# error message will be stored internally, available through the public 
# error() method.
#
#========================================================================

sub create_directive {
    my $self = shift;
    my $text = shift;

    my $directive = { };
    my ($type, $ident);
    my ($tokens, $token);
    my ($name, $value);
    my ($ucname, $uctype);


    # save the original parameter text string
    $directive->{ PARAMSTR } = $text;

    # split the text string into lexical tokens
    $tokens = $self->_split_text($text);

    # identify the type (first token) in the parameter string
    unless (defined($type = shift @$tokens) && ! ref($type)) {
	$self->_error("Missing directive keyword");
	return undef;
    }

    # keep an UPPER CASE $type to avoid using case insensitive regexen
    $uctype = uc $type;


    # parse the directive parameters according to the directive type
    TYPE: {
	
	# END(BLOCK|IF)? directive ignores everything
	$uctype =~ /^END(BLOCK|IF)?$/o && do {
	    $ident = '';
	    last TYPE;
	};

	# DEFINE directive has optional identifier and params
	$uctype =~ /^DEFINE$/o && do {

	    # identifier must be a simple variable
	    $ident = (@$tokens && !ref($tokens->[0]))
		? shift(@$tokens)
		: '';
	    last TYPE;
	};
		
	# INCLUDE/SUBST/BLOCK have mandatory identifier and 
	# optional params
	$uctype =~ /^(INCLUDE|SUBST|BLOCK)$/o && do {

	    # check there is a simple text identifier 
	    unless (@$tokens && !ref($tokens->[0])) {
		$self->_error("No identifier in $type directive");
		return undef;
	    };
	    $ident = shift(@$tokens);
	    last TYPE;
	};

	# if the type isn't recognised, we assume it's a basic SUBST
	$ident = $type;
	$type  = $uctype = 'SUBST';
    }

    # save identifier (as is) and keyword (in upper case)
    $directive->{ TYPE }       = $uctype;
    $directive->{ IDENTIFIER } = $ident;

    # initialise parameter hash
    $directive->{ PARAMS }     = {};

    # examine, process and store the additional directive parameters
    foreach $token (@$tokens) {
    
	# extract/create a name, value pair from token (array or scalar)
	($name, $value) = ref($token) eq 'ARRAY'
	    ? @$token
	    : ($token, 0);

	# un-escape any escaped characters in the value
	$value =~ s/\\(.)/$1/go;

	# keep an UPPER CASE copy of the name
	$ucname = uc $name;

	# is this a "control" parameter?
	if (defined $CONTROL->{ $uctype }->{ $ucname }) {
	    # control params are forced to upper case
	    $directive->{ $ucname } = $value;
	}
	# otherwise, it's a normal variable parameter
	else {
	    $directive->{ PARAMS }->{ $name } = $value;
	} 
    }


    # create a new Directive and check everything worked OK
    unless (defined($directive = $self->{ DIRECTIVE }->new($directive))) {
	# we need to construct a soft reference to the error function in 
	# the Directive base class
	no strict 'refs';

	$self->_error("Directive constructor failed: %s",
	    &{ $self->{ DIRECTIVE } . "\::error" } || '<unreported error>');
    }

    # return undef or reference to newly constructed directive
    $directive;
}



#========================================================================
#
# directive_type()
#
# Public method used by calling objects to determine the class type of 
# the directives that the Factory creates via the create_directive() 
# method.
#
# Returns a string containing the class name.
# 
#========================================================================

sub directive_type {
    my $self = shift;

    $self->{ DIRECTIVE };
}



#========================================================================
#
# error()
#
# Returns the current object error message, stored internally in 
# $self->{ ERROR } or undef if no error condition is recorded.  If the
# first (implicit) parameter isn't an object reference, then this must
# have been called as a package function rather than an object method.
# In this case, the contents of the package variable, $ERROR, is 
# returned.  e.g.
#
#     $factory->error();                   # returns $self->{ ERROR }
#     Text::MetaText::Factory::error();    # returns $ERROR
#
# Returns an error string or undef if no error condition is currently
# raised.
#
#========================================================================

sub error {
    my $self = shift;

    defined $self 
	? $self->{ ERROR }
	: $ERROR;
}



#========================================================================
#                     -----  PRIVATE METHODS -----
#========================================================================
 
#========================================================================
#
# _configure(\%cfg)
#
# Private initialisation method called by the new() constructor.  
# This acts as a hook method for derived classes who may wish to do 
# specific initialisation.  Errors can be reported in the _configure()
# method by calling $self->_error(...)
#
# Returns 1 on success, undef on failure.  Derived methods must follow
# this protocol if they utilise the base class constructor, new(), and 
# return a true/undef value to indicate if the method was successful or 
# not.  This affects whether or not the constructor returns a new object 
# or undef to indicate failure.
#
#========================================================================

sub _configure {
    my $self = shift;
    my $cfg  = shift || { };


    # do nothing - just return success
    1;
}



#========================================================================
#
# _split_text($text)
#
# Utility routine to split the input text, $text, into lexical tokens.
# The tokens are identified as single words which are pushed directly 
# onto a "@tokens" list, or "<variable> = <value>" pairs which are 
# coerced into a two-element array ([0] => variable, [1] => value) which 
# is then stored in the list by reference.
#
# A reference to the list of tokens is returned.  On error, undef is 
# returned and the internal ERROR string will be set.
#
#========================================================================

sub _split_text {
    my $self   = shift;
    my $text   = shift;
    my @tokens = ();


    # some simple definitions of elements we use in the regex
    my $word     = q((\S+));         # a word
    my $space    = q(\s*);           # optional space
    my $quote    = q(");             # single or double quote characters
    my $escape   = "\\\\";           # an escape \\ (both '\' escaped)
    my $anyquote = "[$quote]";       # class for quote chars
    my $equals   = "$space=$space";  # '=', with optional whitespace

    # within a quoted phrase we might find escaped quotes, e.g. 
    # name = "James \"Charlie\" Brown";  to detect this, we scan
    # for sequences of legal characters (not quotes or escapes) up until
    # the first quote or escape;  if we find an escape, we jump past the
    # next character (possible a quote) and repeat the process, and repeat
    # the process, and so on until we *don't* find an escape as the next 
    # character;  that implies it's an unescaped quote and the string ends.
    # (don't worry if that slipped you by - just think of it as magic)

    my $okchars = "[^$quote$escape]*";
    my $qphrase = "$anyquote ( $okchars ($escape.$okchars)* ) $anyquote";


    # split directive parameters; note that our definitions from 
    # above have embedded substrings ( ) so we need to be a little 
    # careful about counting backreferences accurately...
    while ($text =~ 
	    /
				$word $equals $qphrase    # $1 = $2    (NB: $2 contains $3)
				|                         # e.g. (foo) = "(bar baz)"
				$word $equals $word       # $4 = $5    
				|                         # e.g. (foo) = (bar)
				$qphrase                  # $6         (NB: $6 contains $7)
				|                         # e.g. "(foo bar)"
				$word                     # $8
					  					  # e.g. (foo)
	    	    /gxo) { # 'o' - compile regex once only

	if ($6 or $8) {
	    # if $6 or $8 is defined, we found a simple flag.  This gets
	    # pushed directly onto the tokens list
	    push(@tokens, defined($6) ? $6 : $8);
	} 
	else {
	    # $6 and $8 undefined so use $1 = $2, or $4 = $5.  This 
	    # "name = value" pair get pushed onto the token list as
	    # an array reference
	    push(@tokens, [ 
	    	    defined($1) ? $1 : $4, 
	    	    defined($1) ? $2 : $5 
		]);
	}
    }

    # return a reference to the tokens list
    \@tokens;
}



#========================================================================
#
# sub _error($errmsg, @params) 
#
# Formats the error message format, $errmsg, and any additional parameters,
# @params with sprintf and sets the $self->{ ERROR } variable with the 
# resulting string.  This is then available via the public error() method.
# The package variable, $ERROR, is also set so that the error can be 
# determined when the constructor fails (and hence there would be no $self
# in which to store $self->{ ERROR }).  Calling error() as a package 
# function, rather than an object method, triggers this response.
#
# If $errmsg is undefined, the $self->{ ERROR } variable is undefined to
# effectively clear any previous error condition.
#
#========================================================================

sub _error {
    my $self = shift;
    my $msg  = shift;

    $self->{ ERROR } = $ERROR = defined ($msg)
	? sprintf($msg, @_)
	: undef;
}



1;