/usr/local/CPAN/CPANXR/CPANXR/Parser/Perl.pm


# $Id: Perl.pm,v 1.42 2003/10/04 22:51:00 clajac Exp $

package CPANXR::Parser::Perl;
use CPANXR::Database;
use CPANXR::Parser qw(:constants);
use PPI::Tokenizer;
use Carp qw(carp croak);
use strict;

our @ISA = qw(CPANXR::Parser);

my $Symbol = qr/\S?[A-Za-z_][A-Za-z0-9_]*(?:(?:\:\:|\')[A-Za-z0-9_]+)*/;
my @Prefix = qw($ @ % * & :);

sub new {
  my ($pkg, $file, %args) = @_;
  $pkg = ref($pkg) || $pkg;

  bless {
	 file_id => $args{file_id},
	 dist_id => $args{dist_id},
	 file => $file
	}, $pkg;
}

sub parse {
  my $self = shift;
  my $tokenizer = PPI::Tokenizer->load($self->{file});
  my $tokens = $tokenizer->all_tokens or die "Can't tokenizer $self->{file}";

  set_positions($tokens);

  my $idx = 0;
  my %packages;
  my %func;
  my $current_package = "";
  my $current_package_id;
  my $current_caller_sub_id = undef;
  my $bracket_balance = 0;
  my $sym_id;
  my @conn;
  my %Pkg;

  @$tokens = grep { !$_->isa('PPI::Token::Whitespace') } @$tokens;

  # Columns starts at 0
  for(@$tokens) { $_->{_cpanxr}->[1]--; }

 TOKENS: while ($idx < @$tokens) {
    my $token = $tokens->[$idx];
    if ($token->isa('PPI::Token::Bareword')) {
      last TOKENS if($token->content eq '__END__' && $token->{_cpanxr}->[1] == 0);

      my $pre = $tokens->[$idx - 1];
      my $post = $tokens->[$idx + 1];

      # Handle package declarations
      if ($pre && $pre->isa('PPI::Token::Bareword') && $pre->content eq 'package') {
	if ($post && $post->isa('PPI::Token::Structure') && $post->content eq ';') {
	  # Is package declaration
	  $sym_id = CPANXR::Database->insert_symbol($token->content);
	  $self->connect($sym_id, $token, 0, $current_package_id, undef, undef, CONN_PACKAGE);
	  $current_package_id = $sym_id;
	  next TOKENS;
	}
      } elsif ($token->content eq 'package' && $post->isa('PPI::Token::Bareword')) {
	next TOKENS;
      }

      # Handle use and require declarations
      if ($pre && $pre->isa('PPI::Token::Bareword') && $pre->content =~ /^use|require|no$/) {
	my $name = $token->content;
	$sym_id = CPANXR::Database->insert_symbol($name);
	$self->connect($sym_id, $token, 0, undef, $current_package_id, undef, CONN_INCLUDE);
	my $pkg_id = $sym_id;
	if ($pre->content eq 'use') {
	  # continue til ;
	  my $check = $tokens->[++$idx];
	  while ($check && !($check->isa('PPI::Token::Structure') && $check->content eq ';')) {
	    if ($check->isa('PPI::Token::Quote::Single')) {
	    } elsif ($check->isa('PPI::Token::Quote::Words')) {
	      my ($preqw, $parse) = $check->content =~ /^(qw\s*.)(.*)$/;
	      my $preqw_len = length($preqw);
	      chop $parse;
	    
	      my @lines = split/\n/,$parse;
	      $check->{_cpanxr}->[1] += $preqw_len;
	      for my $line (@lines) {
		while($line =~ m/(\s*?)($Symbol)/gc) {
		  my $imp_name = $2;
		  my $prefix = substr($imp_name, 0, 1);
		  unless(grep { $prefix eq $_ } @Prefix) {
		    my $pos = pos($line);
		    if($prefix =~ /^[A-Za-z_]$/) { $prefix  = "" } else { $imp_name = substr($imp_name, 1); }
		    $func{$imp_name} = $pkg_id;
		    my $sym_id = CPANXR::Database->insert_symbol($imp_name);
		    my $offset = $pos - length($imp_name) + length($prefix);

		    if($name eq 'base') {
		      $self->connect($sym_id, $check, $offset, undef, $current_package_id, undef, CONN_ISA);
		    } else {
		      $self->connect($sym_id, $check, $offset, $pkg_id, $current_package_id, undef, CONN_REF);
		    }
		  }
		}
		$check->{_cpanxr}->[1] = 0;
		$check->{_cpanxr}->[0]++;
	      }
	    }
	    $check = $tokens->[++$idx];
	  }
	}

	next TOKENS;
      } elsif ($token->content =~ /^use|require|no$/ &&
	       ($post->isa('PPI::Token::Bareword') ||
	        $post->isa('PPI::Token::Number'))) {
	next TOKENS;
      } 

      # Handle sub declarations
      if ($pre && $pre->isa('PPI::Token::Bareword') && $pre->content eq 'sub') {
	if ($post
	    && ($post->isa('PPI::Token::Structure') && $post->content =~ /\{|\;/)
	    || ($post->isa('PPI::Token::Operator') && $post->content eq ':') 	    
	    || ($post->isa('PPI::Token::SubPrototype'))) {

	  # Is sub declarations
	  my ($pkg, $offset, $name) = make_symbol($token->content);

	  my $pkg_id = $current_package_id;
	  if($pkg) {
	    $pkg_id = CPANXR::Database->insert_symbol($pkg);
	    $self->connect($pkg_id, $token, 0, $pkg_id, undef, undef, CONN_REF);	    
	  }

	  $sym_id = CPANXR::Database->insert_symbol($name);
	  $self->connect($sym_id, $token, $offset, $pkg_id, undef, undef, CONN_DECL);

	  # Make this sub the current caller sub
	  $current_caller_sub_id = $sym_id;

	  next TOKENS;
	}
      }
      
      # Handle my, local and our
      if ($token->content =~ /^our|my|local$/) {
	if ($post && $post->isa('PPI::Token::Bareword')) {
	  $sym_id = CPANXR::Database->insert_symbol($post->content);
	  $self->connect($sym_id, $post, 0, undef, $current_package_id, undef, CONN_REF);
	  $idx++;
	}

	next TOKENS;
      }

      # Handle $v{BAREWORD} and $v->{BAREWORD}
      if ($pre && $pre->isa('PPI::Token::Structure') && $pre->content eq '{') {
	if ($post && $post->isa('PPI::Token::Structure') && $post->content eq '}') {
	  next TOKENS;
	}
      }

      # Handle stringification of left operatnd in => assignment
      if ($post && $post->isa('PPI::Token::Operator') && $post->content eq '=>') {
	next TOKENS;
      }

      # Skip <BAREWORD>
      if ($pre && $pre->isa('PPI::Token::Operator') && $pre->content eq '<' && 
	  $post && $post->isa('PPI::Token::Operator') && $post->content eq '>') {
	next TOKENS;
      }

      # D'oh... locate method or function call
      if (is_method($pre)) {
	$pre = $tokens->[$idx - 2];
	my $key = $idx - 2;
	my $pkg_id = $Pkg{$key};
	if ($pre && !$pre->isa('PPI::Token::Bareword')) {
	  # Method cal
	  my ($pkg, $offset, $name) = make_symbol($token->content);
	  if ($pkg) {
	    $sym_id = CPANXR::Database->insert_symbol($pkg);
	    $self->connect($sym_id, $token, 0, undef, $current_package_id, undef, CONN_REF);
	  }

	  $sym_id = CPANXR::Database->insert_symbol($name);
	  $self->connect($sym_id, $token, $offset, $pkg_id, $current_package_id, $current_caller_sub_id, CONN_METHOD);
	  next TOKENS;	  
	} else {
	  $sym_id = CPANXR::Database->insert_symbol($token->content);
	  $self->connect($sym_id, $token, 0, $pkg_id, $current_package_id, $current_caller_sub_id, CONN_METHOD);
	  next TOKENS;
	}
	next TOKENS;
      } elsif (is_method($post)) {
	if ($token->content !~ /^shift|__PACKAGE__$/) {
	  $sym_id = CPANXR::Database->insert_symbol($token->content);
	  $self->connect($sym_id, $token, 0, undef, $current_package_id, undef,CONN_REF);
	  $Pkg{$idx} = $sym_id;
	}
	next TOKENS;
      }

      unless(_perl_reserved($token->content)) {       
	my $pkg_id = undef;
	my $check_imp = 1;
	my ($pkg, $offset, $name) = make_symbol($token->content);
	if ($pkg) {
	  $pkg_id = CPANXR::Database->insert_symbol($pkg);
	  $self->connect($pkg_id, $token, 0, undef, $current_package_id, undef, CONN_REF);
	  $check_imp = 0;
	} else {
	  if ($post && $post->isa('PPI::Token::Bareword')) {
	    unless(_perl_reserved($post->content)) {
	      $pkg_id = CPANXR::Database->insert_symbol($post->content);
	      $self->connect($pkg_id, $post, 0, undef, $current_package_id, undef, CONN_REF);
	      $idx++;
	    }
	    $check_imp = 0;
	  }
	}

	$pkg_id = $func{$name} if($check_imp && exists $func{$name});
	$sym_id = CPANXR::Database->insert_symbol($name);
	$self->connect($sym_id, $token, $offset, $pkg_id, $current_package_id, $current_caller_sub_id, CONN_FUNCTION);
      }
    } elsif ($token->isa('PPI::Token::Symbol')) {
      if ($token->content =~ /^\&(.*)$/) {
        # Tata!
	my ($pkg, $offset, $name) = make_symbol($1);

	my $pkg_id = $pkg ? CPANXR::Database->insert_symbol($pkg) : $current_package_id;
	$self->connect($pkg_id, $token, 1, undef, $current_package_id, undef, CONN_REF) if($pkg);
	
	if(!$pkg) {
	  $pkg_id = $func{$name} if(exists $func{$name});
	}

	my $sym_id = CPANXR::Database->insert_symbol($name);
	$self->connect($sym_id, $token, $offset + 1, $pkg_id, $current_package_id, $current_caller_sub_id, CONN_FUNCTION);
      } elsif ($token->content eq '@ISA') {
	my $next = $tokens->[$idx + 1];
	if ($next->isa('PPI::Token::Operator') && $next->content eq '=') {
	  $next = $tokens->[$idx + 2];

	  if ($next->isa('PPI::Token::Quote::Words')) {
	    my ($preqw, $parse) = $next->content =~ /^(qw\s*.)(.*)$/;
	    my $preqw_len = length($preqw);
	    chop $parse;
	    
	    my @lines = split/\n/,$parse;
	    $next->{_cpanxr}->[1] += $preqw_len;
	    for(@lines) {
	      while(m/(\s*)($Symbol)/gc) {
		my $sym_id = CPANXR::Database->insert_symbol($2);
		my $offset = pos() - length($2);
		$self->connect($sym_id, $next, $offset, undef, $current_package_id, undef, CONN_ISA);
	      }
	      $next->{_cpanxr}->[1] = 0;
	      $next->{_cpanxr}->[0]++;
	    }

	    $idx += 2;
	  }
	}
      }
    }
  } continue {
    my $token = $tokens->[$idx];
    if($token && $token->isa('PPI::Token::Structure')) {
      if($token->content eq '{') {
	$bracket_balance++;
      } elsif($token->content eq '}') {
	$bracket_balance--;
	if($bracket_balance < 0) {
	  $current_caller_sub_id = undef;	  
	  $bracket_balance = 0;
	}
      }
    }
    $idx++; 
  }

    my @source = $self->slurp_file;
  return scalar @source;
}

sub is_method {
  my $token = shift;
  return 0 unless $token;
  return 0 unless $token->isa('PPI::Token::Operator');
  return 0 unless $token->content eq '->';
  return 1;
}

# make_symbol separates FQ symbols to package + symbol
# it also converts Perl4 style package delimiter (') to Perl5
# style ::

sub make_symbol {
  my $symbol = shift;

  my ($pkg, $offset, $sym);

  if ($symbol =~ /^(.*(?:\:\:|\'))(.*)$/) {
    $pkg = $1;
    $offset = length($pkg);
    $sym = $2;
    $pkg =~ s/(?:\:\:|\')$//;
  } else {
    $pkg = "";
    $offset = 0;
    $sym = $symbol;
  }

  return ($pkg, $offset, $sym);
}

my @Reserved = qw/
  local my for foreach continue do 
  eval if unless elsif last next
  return goto while defined undef keys 
  values each pop push shift unshift
  sort grep map print printf sprintf
  open close substr length sub our
  ref quotemeta caller else wantarray die
  warn join STDERR STDIN STDOUT SUPER __PACKAGE__
  BEGIN DESTROY INIT CHECK END tie
  untie bless exists delete
  lc uc lcfirst ucfirst
/;

sub _perl_reserved {
  my $sym = shift;
  return 0 if ($sym =~ /^\-/);
  foreach (@Reserved) {
    return 1 if $sym eq $_;
  }
  return 0;
}

# finds token positions, provided by Adam Keneny author of PPI.
# Thanks Adam!

sub set_positions {
  my @tokens = UNIVERSAL::isa( $_[0], 'ARRAY' ) ? @{shift()} : return undef;
  return undef if grep { ! UNIVERSAL::isa( $_, 'PPI::Token' ) } @tokens;

  # Set the initial position. Start at line 1, column 1.
  my $line = 1;
  my $column = 1;

  foreach my $token ( @tokens ) {
    # This token is located at the current position
    $token->{_cpanxr} = [ $line, $column ];

    # Does the token contain any newlines
    if ( $token->{content} =~ /\n/ ) {
      # For each newline in the content, increment the line
      while ( $token->{content} =~ m/\n/g ) {
	$line++;
      }

      # Get the bit of the content AFTER the last newline
      $token->{content} =~ /(?<=\n)([^\n]*)$/ or die "This shouldn't fail";

      # Since there was at least one newline, reset the column.
      # To that, add the length of the last bit.
      $column = 1 + length $1;

    } else {
      # Add the token content length to the column
      $column += length $token->{content};
    }

    # Position is now updated for the next token
  }

  # All the tokens have their position set, accessible from your original argument
  return 1;
}	

1;