/usr/local/CPAN/Bio-ConnectDots/Bio/ConnectDots/Parser.pm
package Bio::ConnectDots::Parser;
use vars qw(@ISA @AUTO_ATTRIBUTES @OTHER_ATTRIBUTES %SYNONYMS %DEFAULTS);
use strict;
use Text::Balanced qw(extract_delimited extract_bracketed extract_quotelike);
use Class::AutoClass;
@ISA = qw(Class::AutoClass);
@AUTO_ATTRIBUTES=qw();
%SYNONYMS=();
@OTHER_ATTRIBUTES=qw();
%DEFAULTS=();
Class::AutoClass::declare(__PACKAGE__);
# constraints =
# constraint
# constraint separator constraint ...
# separator = any non-word character or 'and'
sub parse_constraints {
my($self,$text,$want_tree)=@_;
my($constraint,$rest);
my $constraints=[];
while ($text) {
($constraint,$rest)=$self->parse_constraint($text,$want_tree);
last unless $constraint;
push(@$constraints,$constraint);
$rest=~s/^[\s,;]*(AND)*[\s,;]*//is; # consume separator
$text=$rest;
}
my $result;
if (@$constraints) {
$want_tree? $result={match=>$constraints}: $result=$constraints;
} else {
$rest=$text;
}
wantarray? ($result,$rest): $result;
}
# constraint =
# constant
# op constant
# term constant
# term op constant
# try longest first.
sub parse_constraint {
my($self,$text,$want_tree)=@_;
my($term,$op,$constant,$rest_term,$rest,$rest);
($term,$rest_term)=$self->parse_term($text,$want_tree);
goto FAIL unless $term;
# try parsing 'op constant'
($op,$rest)=$self->parse_op($rest_term,$want_tree);
goto SUCCESS if $op=~/exists/i; # no constant needed
if ($op) {
($constant,$rest)=$self->parse_constant($rest,$want_tree);
if ($constant) {
goto SUCCESS;
}
}
# try parsing just 'constant'
($constant,$rest)=$self->parse_constant($rest_term,$want_tree);
goto FAIL unless $constant;
SUCCESS:
my $result={term=>$term,op=>$op,constant=>$constant};
$result={match=>$result} if $want_tree;
return wantarray? ($result,$rest): $result;
FAIL:
return wantarray? (undef,$text): undef;
}
# joins =
# join
# join separator join ...
# separator = any non-word character or 'and'
sub parse_joins {
my($self,$text,$want_tree)=@_;
my($join,$rest);
my $joins=[];
while ($text) {
($join,$rest)=$self->parse_join($text,$want_tree);
last unless $join;
push(@$joins,$join);
$rest=~s/^[\s,;]*(AND)*[\s,;]*//is; # consume separator
$text=$rest;
}
my $result;
if (@$joins) {
$want_tree? $result={match=>$joins}: $result=$joins;
} else {
$rest=$text;
}
wantarray? ($result,$rest): $result;
}
# join =
# term = term
sub parse_join {
my($self,$text,$want_tree)=@_;
my($term0,$rest)=$self->parse_term($text,$want_tree);
goto FAIL unless $term0;
$rest=~s/^\s*=+\s*//is; # consume separator
my($term1,$rest)=$self->parse_term($rest,$want_tree);
goto FAIL unless $term1;
SUCCESS:
my $result={term0=>$term0,term1=>$term1};
$result={match=>$result} if $want_tree;
return wantarray? ($result,$rest): $result;
FAIL:
return wantarray? (undef,$text): undef;
}
# aliases =
# alias
# alias separator alias ...
# separator = any non-word character
sub parse_aliases {
my($self,$text,$want_tree)=@_;
my($alias,$rest);
my $aliases=[];
while ($text) {
($alias,$rest)=$self->parse_alias($text,$want_tree);
last unless $alias;
push(@$aliases,$alias);
$rest=~s/^[\s,;]*//is; # consume separator
$text=$rest;
}
my $result;
if (@$aliases) {
$want_tree? $result={match=>$aliases}: $result=$aliases;
} else {
$rest=$text;
}
wantarray? ($result,$rest): $result;
}
# alias =
# name separator alias
# separator = any non-word character or AS
sub parse_alias {
my($self,$text,$want_tree)=@_;
my($target_name,$rest)=$self->parse_qword($text,$want_tree);
goto FAIL unless $target_name;
$rest=~s/^\s*AS\s*|[\s,;]*//is; # consume separator
my($alias_name,$rest)=$self->parse_qword($rest,$want_tree);
goto FAIL unless $alias_name;
SUCCESS:
my $result={target_name=>$target_name,alias_name=>$alias_name};
$result={match=>$result} if $want_tree;
return wantarray? ($result,$rest): $result;
FAIL:
return wantarray? (undef,$text): undef;
}
# term = term1 | term1.term1 | term1.term1.term1
# approximate this by list of any number of term1's
sub parse_term {
my($self,$text,$want_tree)=@_;
my($term1,$rest);
my $term=[];
while ($text) {
($term1,$rest)=$self->parse_term1($text,$want_tree);
last unless $term1;
push(@$term,$term1);
last unless $rest=~s/^\s*\.\s*//s; # done unless separator is '.'
$text=$rest;
}
my $result;
if (@$term) {
$result=$want_tree? {match=>$term}: $term;
} else {
$rest=$text;
}
wantarray? ($result,$rest): $result;
}
# term1='*' | word | quoted_phrase | list
sub parse_term1 {
my($self,$text,$want_tree)=@_;
$text.=' '; # append space because extract_quotelike doesn't
# handel q() if ) is last character of string
my($rule,$match,$rest,$prefix,$body,$skip);
$text=~s/^\s*//s; # strip leading spaces
if (($match,$rest)=$text=~/^(\*)(.*)/s) {
$rule='*';
} elsif (($match,$rest,$prefix,$skip,$skip,$body)=extract_quotelike($text),$match) {
$rule='quoted_phrase';
} elsif (($match,$rest,$prefix)=extract_bracketed($text,'[(q'),$match) {
$rule='list';
($match)=$match=~/^[\[\(](.*)[\)\]]$/s;
$match=$self->parse_term_list($match,$want_tree);
} elsif (($match,$rest)=$text=~/^(\w+)(.*)/s) {
$rule='word';
}
my $result;
if ($match) {
$match=$body if defined $body;
$result=$want_tree? {match=>$match,rule=>$rule}: $match;
} else {
$rest=$text;
}
wantarray? ($result,$rest): $result;
}
sub parse_term_list {
my($self,$text,$want_tree)=@_;
my($term1,$rest);
my $term=[];
while ($text) {
($term1,$rest)=$self->parse_term1($text,$want_tree);
last unless $term1;
push(@$term,$term1);
$rest=~s/^[\s,;]//s; # consume separator
$text=$rest;
}
my $result;
if (@$term) {
$result=$want_tree? {match=>$term}: $term;
} else {
$rest=$text;
}
wantarray? ($result,$rest): $result;
}
# term = term1 | term1.term1 | term1.term1.term1
# approximate this by list of any number of term1's
sub parse_term_value {
my($self,$text,$want_tree)=@_;
my($term1,$rest);
my $term=[];
while ($text) {
($term1,$rest)=$self->parse_term1_value($text,$want_tree);
last unless $term1;
push(@$term,$term1);
$text=$rest;
}
my $result;
if (@$term) {
$result=$want_tree? {match=>$term}: $term;
} else {
$rest=$text;
}
wantarray? ($result,$rest): $result;
}
# term1='*' | everything to . | list
sub parse_term1_value {
my($self,$text,$want_tree)=@_;
my($rule,$match,$rest,$prefix);
$text=~s/^\s*//s; # strip leading spaces
if (($match,$rest)=$text=~/^(\*)(.*)/s) {
$rule='*';
} elsif (($match,$rest,$prefix)=extract_bracketed($text,'[(q'),$match) {
$rule='list';
($match)=$match=~/^[\[\(](.*)[\)\]]$/s;
$match=$self->parse_term_list($match,$want_tree);
} elsif (($match,$rest)=$text=~/^(.*?)\.(.*)/s) {
$match=~s/\s*$//s; # strip trailing spaces
$rule='word';
} else {
$match=$text;
$rest='';
$rule='value';
}
my $result;
if ($match) {
$result=$want_tree? {match=>$match,rule=>$rule}: $match;
} else {
$rest=$text;
}
wantarray? ($result,$rest): $result;
}
# op = usual comparison ops | 'in'
sub parse_op {
my($self,$text,$want_tree)=@_;
$text=~s/^\s*//s; # strip leading spaces
my($match,$rest)=$text=~/^(exists|not\s*in|in|<=|==|!=|>=|<|=|>)(.*)/is; # longest patterns must be first
$match=uc($match);
$match='NOT IN' if $match=~/NOT\s*IN/;
$match='=' if $match eq '=='; # special case == for benefit of Perl programmers
my $result;
if ($match) {
$result=$want_tree? {match=>$match,rule=>'op'}: $match;
} else {
$rest=$text;
}
wantarray? ($result,$rest): $result;
}
# constant = word | quoted_phrase | list
sub parse_constant {
my($self,$text,$want_tree)=@_;
$text.=' '; # append space because extract_quotelike doesn't
# handel q() if ) is last character of string
my($rule,$match,$rest,$prefix,$body,$skip);
$text=~s/^\s*//s; # strip leading spaces
if (($match,$rest,$prefix,$skip,$skip,$body)=extract_quotelike($text),$match) {
$rule='quoted_phrase';
} elsif (($match,$rest,$prefix)=extract_bracketed($text,'[(q'),$match) {
$rule='list';
($match)=$match=~/^[\[\(](.*)[\)\]]$/s;
$match=$self->parse_constant_list($match,$want_tree);
} elsif (($match,$rest)=$text=~/^([\w\.]+)\W*(.*)/s) {
$rule='word';
}
my $result;
if ($match) {
$match=$body if defined $body;
$result=$want_tree? {match=>$match,rule=>$rule}: $match;
} else {
$rest=$text;
}
wantarray? ($result,$rest): $result;
}
sub parse_constant_list {
my($self,$text,$want_tree)=@_;
my($constant1,$rest);
my $constants=[];
while ($text) {
($constant1,$rest)=$self->parse_constant($text,$want_tree);
last unless $constant1;
push(@$constants,$constant1);
$text=$rest;
}
my $result;
if (@$constants) {
$result=$want_tree? {match=>$constants}: $constants;
} else {
$rest=$text;
}
wantarray? ($result,$rest): $result;
}
# constant_value = entire string | list
sub parse_constant_value {
my($self,$text,$want_tree)=@_;
my($rule,$match,$rest,$prefix,$body,$skip);
if (($match,$rest,$prefix)=extract_bracketed($text,'[(q'),$match) {
$rule='list';
($match)=$match=~/^[\[\(](.*)[\)\]]$/s;
$match=$self->parse_constant_list($match,$want_tree);
} else{
$match=$text;
$rest='';
$rule='value';
}
my $result;
if ($match) {
$result=$want_tree? {match=>$match,rule=>$rule}: $match;
} else {
$rest=$text;
}
wantarray? ($result,$rest): $result;
}
# outputs =
# output
# output separator output ...
# separator = any non-word character
sub parse_outputs {
my($self,$text,$want_tree)=@_;
my($output,$rest);
my $outputs=[];
while ($text) {
($output,$rest)=$self->parse_output($text,$want_tree);
last unless $output;
push(@$outputs,$output);
$rest=~s/^[\s,;]*//is; # consume separator
$text=$rest;
}
my $result;
if (@$outputs) {
$want_tree? $result={match=>$outputs}: $result=$outputs;
} else {
$rest=$text;
}
wantarray? ($result,$rest): $result;
}
# output =
# word | word.word, optionally followed by 'AS' name
sub parse_output {
my($self,$text,$want_tree)=@_;
my($output1,$rest,$output_name);
my $output=[];
while ($text) {
($output1,$rest)=$self->parse_qword($text,$want_tree);
last unless $output1;
push(@$output,$output1);
last unless $rest=~s/^\s*\.\s*//s; # done unless separator is '.'
$text=$rest;
}
goto FAIL unless @$output;
if ($rest=~s/^\W*AS\W*//is) { # consume separator and
# parse output_name if separator is 'as'
($output_name,$rest)=$self->parse_qword($rest,$want_tree);
goto FAIL unless $output_name;
}
SUCCESS:
my $result={termlist=>$output,output_name=>$output_name};
$result={match=>$result} if $want_tree;
return wantarray? ($result,$rest): $result;
FAIL:
return wantarray? (undef,$text): undef;
}
# qword = word | quoted_phrase
sub parse_qword {
my($self,$text,$want_tree)=@_;
$text.=' '; # append space because extract_quotelike doesn't
# handel q() if ) is last character of string
my($rule,$match,$rest,$prefix,$body,$skip);
$text=~s/^\s*//s; # strip leading spaces
if (($match,$rest,$prefix,$skip,$skip,$body)=extract_quotelike($text),$match) {
$rule='quoted_phrase';
} elsif (($match,$rest)=$text=~/^(\w+)(.*)/s) {
$rule='word';
}
my $result;
if ($match) {
$match=$body if defined $body;
$result=$want_tree? {match=>$match,rule=>$rule}: $match;
} else {
$rest=$text;
}
wantarray? ($result,$rest): $result;
}
1;