/usr/local/CPAN/CPANXR/CPANXR/Parser/Perl.pm
# $Id: Perl.pm,v 1.42 2003/10/04 22:51:00 clajac Exp $
package CPANXR::Parser::Perl;
use CPANXR::Database;
use CPANXR::Parser qw(:constants);
use PPI::Tokenizer;
use Carp qw(carp croak);
use strict;
our @ISA = qw(CPANXR::Parser);
my $Symbol = qr/\S?[A-Za-z_][A-Za-z0-9_]*(?:(?:\:\:|\')[A-Za-z0-9_]+)*/;
my @Prefix = qw($ @ % * & :);
sub new {
my ($pkg, $file, %args) = @_;
$pkg = ref($pkg) || $pkg;
bless {
file_id => $args{file_id},
dist_id => $args{dist_id},
file => $file
}, $pkg;
}
sub parse {
my $self = shift;
my $tokenizer = PPI::Tokenizer->load($self->{file});
my $tokens = $tokenizer->all_tokens or die "Can't tokenizer $self->{file}";
set_positions($tokens);
my $idx = 0;
my %packages;
my %func;
my $current_package = "";
my $current_package_id;
my $current_caller_sub_id = undef;
my $bracket_balance = 0;
my $sym_id;
my @conn;
my %Pkg;
@$tokens = grep { !$_->isa('PPI::Token::Whitespace') } @$tokens;
# Columns starts at 0
for(@$tokens) { $_->{_cpanxr}->[1]--; }
TOKENS: while ($idx < @$tokens) {
my $token = $tokens->[$idx];
if ($token->isa('PPI::Token::Bareword')) {
last TOKENS if($token->content eq '__END__' && $token->{_cpanxr}->[1] == 0);
my $pre = $tokens->[$idx - 1];
my $post = $tokens->[$idx + 1];
# Handle package declarations
if ($pre && $pre->isa('PPI::Token::Bareword') && $pre->content eq 'package') {
if ($post && $post->isa('PPI::Token::Structure') && $post->content eq ';') {
# Is package declaration
$sym_id = CPANXR::Database->insert_symbol($token->content);
$self->connect($sym_id, $token, 0, $current_package_id, undef, undef, CONN_PACKAGE);
$current_package_id = $sym_id;
next TOKENS;
}
} elsif ($token->content eq 'package' && $post->isa('PPI::Token::Bareword')) {
next TOKENS;
}
# Handle use and require declarations
if ($pre && $pre->isa('PPI::Token::Bareword') && $pre->content =~ /^use|require|no$/) {
my $name = $token->content;
$sym_id = CPANXR::Database->insert_symbol($name);
$self->connect($sym_id, $token, 0, undef, $current_package_id, undef, CONN_INCLUDE);
my $pkg_id = $sym_id;
if ($pre->content eq 'use') {
# continue til ;
my $check = $tokens->[++$idx];
while ($check && !($check->isa('PPI::Token::Structure') && $check->content eq ';')) {
if ($check->isa('PPI::Token::Quote::Single')) {
} elsif ($check->isa('PPI::Token::Quote::Words')) {
my ($preqw, $parse) = $check->content =~ /^(qw\s*.)(.*)$/;
my $preqw_len = length($preqw);
chop $parse;
my @lines = split/\n/,$parse;
$check->{_cpanxr}->[1] += $preqw_len;
for my $line (@lines) {
while($line =~ m/(\s*?)($Symbol)/gc) {
my $imp_name = $2;
my $prefix = substr($imp_name, 0, 1);
unless(grep { $prefix eq $_ } @Prefix) {
my $pos = pos($line);
if($prefix =~ /^[A-Za-z_]$/) { $prefix = "" } else { $imp_name = substr($imp_name, 1); }
$func{$imp_name} = $pkg_id;
my $sym_id = CPANXR::Database->insert_symbol($imp_name);
my $offset = $pos - length($imp_name) + length($prefix);
if($name eq 'base') {
$self->connect($sym_id, $check, $offset, undef, $current_package_id, undef, CONN_ISA);
} else {
$self->connect($sym_id, $check, $offset, $pkg_id, $current_package_id, undef, CONN_REF);
}
}
}
$check->{_cpanxr}->[1] = 0;
$check->{_cpanxr}->[0]++;
}
}
$check = $tokens->[++$idx];
}
}
next TOKENS;
} elsif ($token->content =~ /^use|require|no$/ &&
($post->isa('PPI::Token::Bareword') ||
$post->isa('PPI::Token::Number'))) {
next TOKENS;
}
# Handle sub declarations
if ($pre && $pre->isa('PPI::Token::Bareword') && $pre->content eq 'sub') {
if ($post
&& ($post->isa('PPI::Token::Structure') && $post->content =~ /\{|\;/)
|| ($post->isa('PPI::Token::Operator') && $post->content eq ':')
|| ($post->isa('PPI::Token::SubPrototype'))) {
# Is sub declarations
my ($pkg, $offset, $name) = make_symbol($token->content);
my $pkg_id = $current_package_id;
if($pkg) {
$pkg_id = CPANXR::Database->insert_symbol($pkg);
$self->connect($pkg_id, $token, 0, $pkg_id, undef, undef, CONN_REF);
}
$sym_id = CPANXR::Database->insert_symbol($name);
$self->connect($sym_id, $token, $offset, $pkg_id, undef, undef, CONN_DECL);
# Make this sub the current caller sub
$current_caller_sub_id = $sym_id;
next TOKENS;
}
}
# Handle my, local and our
if ($token->content =~ /^our|my|local$/) {
if ($post && $post->isa('PPI::Token::Bareword')) {
$sym_id = CPANXR::Database->insert_symbol($post->content);
$self->connect($sym_id, $post, 0, undef, $current_package_id, undef, CONN_REF);
$idx++;
}
next TOKENS;
}
# Handle $v{BAREWORD} and $v->{BAREWORD}
if ($pre && $pre->isa('PPI::Token::Structure') && $pre->content eq '{') {
if ($post && $post->isa('PPI::Token::Structure') && $post->content eq '}') {
next TOKENS;
}
}
# Handle stringification of left operatnd in => assignment
if ($post && $post->isa('PPI::Token::Operator') && $post->content eq '=>') {
next TOKENS;
}
# Skip <BAREWORD>
if ($pre && $pre->isa('PPI::Token::Operator') && $pre->content eq '<' &&
$post && $post->isa('PPI::Token::Operator') && $post->content eq '>') {
next TOKENS;
}
# D'oh... locate method or function call
if (is_method($pre)) {
$pre = $tokens->[$idx - 2];
my $key = $idx - 2;
my $pkg_id = $Pkg{$key};
if ($pre && !$pre->isa('PPI::Token::Bareword')) {
# Method cal
my ($pkg, $offset, $name) = make_symbol($token->content);
if ($pkg) {
$sym_id = CPANXR::Database->insert_symbol($pkg);
$self->connect($sym_id, $token, 0, undef, $current_package_id, undef, CONN_REF);
}
$sym_id = CPANXR::Database->insert_symbol($name);
$self->connect($sym_id, $token, $offset, $pkg_id, $current_package_id, $current_caller_sub_id, CONN_METHOD);
next TOKENS;
} else {
$sym_id = CPANXR::Database->insert_symbol($token->content);
$self->connect($sym_id, $token, 0, $pkg_id, $current_package_id, $current_caller_sub_id, CONN_METHOD);
next TOKENS;
}
next TOKENS;
} elsif (is_method($post)) {
if ($token->content !~ /^shift|__PACKAGE__$/) {
$sym_id = CPANXR::Database->insert_symbol($token->content);
$self->connect($sym_id, $token, 0, undef, $current_package_id, undef,CONN_REF);
$Pkg{$idx} = $sym_id;
}
next TOKENS;
}
unless(_perl_reserved($token->content)) {
my $pkg_id = undef;
my $check_imp = 1;
my ($pkg, $offset, $name) = make_symbol($token->content);
if ($pkg) {
$pkg_id = CPANXR::Database->insert_symbol($pkg);
$self->connect($pkg_id, $token, 0, undef, $current_package_id, undef, CONN_REF);
$check_imp = 0;
} else {
if ($post && $post->isa('PPI::Token::Bareword')) {
unless(_perl_reserved($post->content)) {
$pkg_id = CPANXR::Database->insert_symbol($post->content);
$self->connect($pkg_id, $post, 0, undef, $current_package_id, undef, CONN_REF);
$idx++;
}
$check_imp = 0;
}
}
$pkg_id = $func{$name} if($check_imp && exists $func{$name});
$sym_id = CPANXR::Database->insert_symbol($name);
$self->connect($sym_id, $token, $offset, $pkg_id, $current_package_id, $current_caller_sub_id, CONN_FUNCTION);
}
} elsif ($token->isa('PPI::Token::Symbol')) {
if ($token->content =~ /^\&(.*)$/) {
# Tata!
my ($pkg, $offset, $name) = make_symbol($1);
my $pkg_id = $pkg ? CPANXR::Database->insert_symbol($pkg) : $current_package_id;
$self->connect($pkg_id, $token, 1, undef, $current_package_id, undef, CONN_REF) if($pkg);
if(!$pkg) {
$pkg_id = $func{$name} if(exists $func{$name});
}
my $sym_id = CPANXR::Database->insert_symbol($name);
$self->connect($sym_id, $token, $offset + 1, $pkg_id, $current_package_id, $current_caller_sub_id, CONN_FUNCTION);
} elsif ($token->content eq '@ISA') {
my $next = $tokens->[$idx + 1];
if ($next->isa('PPI::Token::Operator') && $next->content eq '=') {
$next = $tokens->[$idx + 2];
if ($next->isa('PPI::Token::Quote::Words')) {
my ($preqw, $parse) = $next->content =~ /^(qw\s*.)(.*)$/;
my $preqw_len = length($preqw);
chop $parse;
my @lines = split/\n/,$parse;
$next->{_cpanxr}->[1] += $preqw_len;
for(@lines) {
while(m/(\s*)($Symbol)/gc) {
my $sym_id = CPANXR::Database->insert_symbol($2);
my $offset = pos() - length($2);
$self->connect($sym_id, $next, $offset, undef, $current_package_id, undef, CONN_ISA);
}
$next->{_cpanxr}->[1] = 0;
$next->{_cpanxr}->[0]++;
}
$idx += 2;
}
}
}
}
} continue {
my $token = $tokens->[$idx];
if($token && $token->isa('PPI::Token::Structure')) {
if($token->content eq '{') {
$bracket_balance++;
} elsif($token->content eq '}') {
$bracket_balance--;
if($bracket_balance < 0) {
$current_caller_sub_id = undef;
$bracket_balance = 0;
}
}
}
$idx++;
}
my @source = $self->slurp_file;
return scalar @source;
}
sub is_method {
my $token = shift;
return 0 unless $token;
return 0 unless $token->isa('PPI::Token::Operator');
return 0 unless $token->content eq '->';
return 1;
}
# make_symbol separates FQ symbols to package + symbol
# it also converts Perl4 style package delimiter (') to Perl5
# style ::
sub make_symbol {
my $symbol = shift;
my ($pkg, $offset, $sym);
if ($symbol =~ /^(.*(?:\:\:|\'))(.*)$/) {
$pkg = $1;
$offset = length($pkg);
$sym = $2;
$pkg =~ s/(?:\:\:|\')$//;
} else {
$pkg = "";
$offset = 0;
$sym = $symbol;
}
return ($pkg, $offset, $sym);
}
my @Reserved = qw/
local my for foreach continue do
eval if unless elsif last next
return goto while defined undef keys
values each pop push shift unshift
sort grep map print printf sprintf
open close substr length sub our
ref quotemeta caller else wantarray die
warn join STDERR STDIN STDOUT SUPER __PACKAGE__
BEGIN DESTROY INIT CHECK END tie
untie bless exists delete
lc uc lcfirst ucfirst
/;
sub _perl_reserved {
my $sym = shift;
return 0 if ($sym =~ /^\-/);
foreach (@Reserved) {
return 1 if $sym eq $_;
}
return 0;
}
# finds token positions, provided by Adam Keneny author of PPI.
# Thanks Adam!
sub set_positions {
my @tokens = UNIVERSAL::isa( $_[0], 'ARRAY' ) ? @{shift()} : return undef;
return undef if grep { ! UNIVERSAL::isa( $_, 'PPI::Token' ) } @tokens;
# Set the initial position. Start at line 1, column 1.
my $line = 1;
my $column = 1;
foreach my $token ( @tokens ) {
# This token is located at the current position
$token->{_cpanxr} = [ $line, $column ];
# Does the token contain any newlines
if ( $token->{content} =~ /\n/ ) {
# For each newline in the content, increment the line
while ( $token->{content} =~ m/\n/g ) {
$line++;
}
# Get the bit of the content AFTER the last newline
$token->{content} =~ /(?<=\n)([^\n]*)$/ or die "This shouldn't fail";
# Since there was at least one newline, reset the column.
# To that, add the length of the last bit.
$column = 1 + length $1;
} else {
# Add the token content length to the column
$column += length $token->{content};
}
# Position is now updated for the next token
}
# All the tokens have their position set, accessible from your original argument
return 1;
}
1;