/usr/local/CPAN/Lingua-LinkParser-MatchPath/Lingua/LinkParser/MatchPath/Lex.pm


package Lingua::LinkParser::MatchPath::Lex;

use strict;
use Exporter::Lite;
use Lex;

our @tokens = (
	       # word class
	       POS => '_[pavding]_',
	       WORD_REGEXP => '(?:/.+?/)[i]?',
	       WORD => '\w+',

	       # label class
	       LABEL_REGEXP => '<\/.+?\/>',
	       LABEL => '<.+?>',

	       POUND_SIGN => '#(?=[<(])',
	       EXCLM_SIGN => '!(?=[<\w\/(])',
	       AT_SIGN => '@(?=[<(])',

	       NEWLINE => '\n',
	       EOR => ';',
	       LPAREN => '[(]',
	       RPAREN => '[)]',
#	       QM => '[?]',
#	       AND => '[&,]',
	       OR => '[|]',
	       PERCENT => '[%]',
	       COMMENT => '^\s*#.+?$',

	       ERROR => '.+',
	   );

sub new {
    my $class = shift;
    my %opt = @_;
    bless {
	lexer => Lex->new(@tokens),
	debug => $opt{debug},
    }, $class;
}

sub _get_tokens {
    my $self = shift;
    my $token;
    my ($name, $content);
    while($token = $self->{lexer}->nextToken){
	($name, $content) = ($token->name(), $token->get);
	$name =~ s/.+:://;
	$content =~ s/\n$// if $name =~ /EOR$/;
	die "Error occurred during tokenizing text: ( $content )" if $name =~ /ERROR/;
	last unless $token->name =~ /(?:NEWLINE|COMMENT)$/;
    }
    if (not $self->{lexer}->eof) {
	[ $name, $content ];
    }
}

# post-processing
sub _pp_tokens {
    my $self = shift;
    my $token = $self->{token};
    my @token;
    for ( my $i = 0; $i<@$token; ){
	# one-step matching
	if(
	   $token->[$i][0] =~ /^(?:POUND|EXCML|AT)_SIGN$/o &&
	   $token->[$i+1][0] =~ /^LABEL/o
	   ){
	    push
		@token,
		$token->[$i],
		[ 'LPAREN' => '(' ],
		$token->[$i+1],
		$token->[$i+2],
		[ 'RPAREN' => ')' ];
		$i+=3;
	}
	# append '@' if there is none before '('
	elsif(
	      $token->[$i][0] eq 'LPAREN' &&
	      $token->[$i-1][0] =~ /^(?:WORD|POS)/
	      ){
	    push 
		@token,
                [ 'AT_SIGN' => '@' ],
                $token->[$i];
	    $i+=1;

	}
	else {
	    push @token, $token->[$i];
	    $i++;
	}
    }
    $self->{token} = \@token;
}

sub load {
    my $self = shift;
    $self->{lexer}->from(shift);
    while( my $t = $self->_get_tokens() ){
	push @{$self->{token}}, $t;
    }
    $self->_pp_tokens;
}


sub lex {
    my $self = shift;
    my $t = shift @{$self->{token}};
    if( $t->[0] ){
	printf ("   - %-15s ==> %s\n", @{$t}[1,0]) if $self->{debug};
	return @$t;
    }
    ('', undef);
}