B::Disassembler - Disassemble Perl bytecode


B-C documentation Contained in the B-C distribution.

Index


Code Index:

NAME

Top

B::Disassembler - Disassemble Perl bytecode

SYNOPSIS

Top

	use Disassembler qw(print_insn);
        my $fh = new FileHandle "<$ARGV[0]";
	disassemble_fh($fh, \&print_insn);

DESCRIPTION

Top

disassemble_fh takes an filehandle with bytecode and a printer function. The printer function gets three arguments: insn, arg (optional) and the comment.

See lib/B/Disassembler.pm and scripts/disassemble.

disassemble_fh (filehandle, printer_coderef, [ verbose ])

Top

disassemble_fh takes an filehandle with bytecode and a printer coderef.

Two default printer functions are provided:

  print_insn print_insn_bare

print_insn

Top

print_insn_bare

Top

get_header

Top

Returns the .plc header as array of

  ( magic, archname, blversion, ivsize, ptrsize,
    byteorder, longsize, archflag, perlversion )

in ARRAY context, or in SCALAR context the array from above as named hash.

magic is always "PLBC". "PLJC" is reserved for JIT'ted code also loaded via ByteLoader.

archname is the archname string and is in the ByteLoader up to 0.06 checked strictly. Starting with ByteLoader 0.06_05 platform compatibility is implemented by checking the $archflag, and doing byteorder swapping for same length longsize, and adjusting the ivsize and ptrsize.

blversion is the ByteLoader version from the header as string. Up to ByteLoader 0.06 this version must have matched exactly, since 0.07 earlier ByteLoader versions are also accepted in the ByteLoader.

ivsize matches $Config{ivsize} of the assembling perl. A number, 4 or 8 only supported.

ptrsize matches $Config{ptrsize} of the assembling perl. A number, 4 or 8 only supported.

longsize is $Config{longsize} of the assembling perl. A number, 4 or 8. Only since blversion 0.06_03.

byteorder is a string of "0x12345678" on big-endian or "0x78563412" (?) on little-endian machines. The beginning "0x" is stripped for compatibility with intermediate ByteLoader versions, i.e. 5.6.1 to 5.8.0, Added with blversion 0.06_03, and also with blversion 0.04. See BcVersions

archflag is a bitmask of opcode platform-dependencies. Currently used: bit 1 for USE_ITHREADS bit 2 for MULTIPLICITY Added with blversion 0.06_05.

perlversion $] of the perl which produced this bytecode as string. Added with blversion 0.06_06.

AUTHORS

Top

Malcolm Beattie MICB at cpan.org (retired), Reini Urban perl-compiler@googlegroups.com since 2008.


B-C documentation Contained in the B-C distribution.

#      Disassembler.pm
#
#      Copyright (c) 1996 Malcolm Beattie
#      Copyright (c) 2008,2009,2010,2011 Reini Urban
#
#      You may distribute under the terms of either the GNU General Public
#      License or the Artistic License, as specified in the README file.

$B::Disassembler::VERSION = '1.10';

package B::Disassembler::BytecodeStream;

use FileHandle;
use Carp;
use Config qw(%Config);
use B qw(cstring cast_I32);
@ISA = qw(FileHandle);

sub readn {
  my ( $fh, $len ) = @_;
  my $data;
  read( $fh, $data, $len );
  croak "reached EOF while reading $len bytes" unless length($data) == $len;
  return $data;
}

sub GET_U8 {
  my $fh = shift;
  my $c  = $fh->getc;
  croak "reached EOF while reading U8" unless defined($c);
  return ord($c);
}

sub GET_U16 {
  my $fh  = shift;
  my $str = $fh->readn(2);
  croak "reached EOF while reading U16" unless length($str) == 2;

  # Todo: check byteorder
  return unpack( "S", $str );
}

sub GET_NV {
  my $fh = shift;
  my ( $str, $c );
  while ( defined( $c = $fh->getc ) && $c ne "\0" ) {
    $str .= $c;
  }
  croak "reached EOF while reading double" unless defined($c);
  return $str;
}

sub GET_U32 {
  my $fh  = shift;
  my $str = $fh->readn(4);
  croak "reached EOF while reading U32" unless length($str) == 4;

  # Todo: check byteorder
  return unpack( "L", $str );
}

sub GET_I32 {
  my $fh  = shift;
  my $str = $fh->readn(4);
  croak "reached EOF while reading I32" unless length($str) == 4;

  # Todo: check byteorder
  return unpack( "l", $str );
}

sub GET_objindex {
  my $fh  = shift;
  my $str = $fh->readn(4);
  croak "reached EOF while reading objindex" unless length($str) == 4;

  # Todo: check byteorder
  return unpack( "L", $str );
}

sub GET_opindex {
  my $fh  = shift;
  my $str = $fh->readn(4);
  croak "reached EOF while reading opindex" unless length($str) == 4;

  # Todo: check byteorder
  return unpack( "L", $str );
}

sub GET_svindex {
  my $fh  = shift;
  my $str = $fh->readn(4);
  croak "reached EOF while reading svindex" unless length($str) == 4;

  # Todo: check byteorder
  return unpack( "L", $str );
}

sub GET_pvindex {
  my $fh  = shift;
  my $str = $fh->readn(4);
  croak "reached EOF while reading pvindex" unless length($str) == 4;

  # Todo: check byteorder
  return unpack( "L", $str );
}

sub GET_hekindex {
  my $fh  = shift;
  my $str = $fh->readn(4);
  croak "reached EOF while reading hekindex" unless length($str) == 4;

  # Todo: check byteorder
  return unpack( "L", $str );
}

sub GET_strconst {
  my $fh = shift;
  my ( $str, $c );
  $str = '';
  while ( defined( $c = $fh->getc ) && $c ne "\0" ) {
    $str .= $c;
  }
  croak "reached EOF while reading strconst" unless defined($c);
  return cstring($str);
}

sub GET_pvcontents { }

sub GET_PV {
  my $fh = shift;
  my $str;
  my $len = $fh->GET_U32;
  if ($len) {
    read( $fh, $str, $len );
    croak "reached EOF while reading PV" unless length($str) == $len;
    return cstring($str);
  }
  else {
    return '""';
  }
}

sub GET_comment_t {
  my $fh = shift;
  my ( $str, $c );
  while ( defined( $c = $fh->getc ) && $c ne "\n" ) {
    $str .= $c;
  }
  croak "reached EOF while reading comment" unless defined($c);
  return cstring($str);
}

sub GET_double {
  my $fh = shift;
  my ( $str, $c );
  while ( defined( $c = $fh->getc ) && $c ne "\0" ) {
    $str .= $c;
  }
  croak "reached EOF while reading double" unless defined($c);
  return $str;
}

sub GET_none { }

sub GET_op_tr_array {
  my $fh  = shift;
  my $len = unpack "S", $fh->readn(2);
  my @ary = unpack "S*", $fh->readn( $len * 2 );
  return join( ",", $len, @ary );
}

sub GET_IV64 {
  my $fh  = shift;
  my $str = $fh->readn(8);
  croak "reached EOF while reading I32" unless length($str) == 8;

  # Todo: check byteorder
  my $i = unpack( "q", $str );
  return $i > 8 ? sprintf "0x%09llx", $i : $i;
}

sub GET_IV {
  # Check the header settings, not the current settings.
  $B::Disassembler::ivsize == 4 ? &GET_I32 : &GET_IV64;
  # $Config{ivsize} == 4 ? &GET_I32 : &GET_IV64;
}

sub GET_PADOFFSET {
  # Check the header settings, not the current settings.
  $B::Disassembler::ptrsize == 8 ? &GET_IV64 : &GET_U32;
  # $Config{ptrsize} == 8 ? &GET_IV64 : &GET_U32;
}

sub GET_long {
  # Check the header settings, not the current settings.
  # B::Disassembler::ivsize or longsize if ge xxx?
  if ($B::Disassembler::longsize) {
    return $B::Disassembler::longsize == 8 ? &GET_IV64 : &GET_U32;
  } else {
    # return $Config{longsize} == 8 ? &GET_IV64 : &GET_U32;
    return $B::Disassembler::ivsize == 8 ? &GET_IV64 : &GET_U32;
  }
}

sub GET_pmflags {
  my $fh  = shift;
  my $size = 2;
  if ($B::Disassembler::blversion ge '"0.07"') {
    if ($B::Disassembler::perlversion ge '"5.013"') {
      return $fh->GET_U32;
    }
  }
  return $fh->GET_U16;
}

package B::Disassembler;
use Exporter;
@ISA       = qw(Exporter);
@EXPORT_OK = qw(disassemble_fh get_header print_insn print_insn_bare @opname);
use Carp;
use strict;
use B::Asmdata qw(%insn_data @insn_name);
use Opcode qw(opset_to_ops full_opset);
use Config qw(%Config);

BEGIN {
  if ( $] < 5.009 ) {
    B::Asmdata->import(qw(@specialsv_name));
  }
  else {
    B->import(qw(@specialsv_name));
  }
}

my $ix;
our @opname = opset_to_ops(full_opset);
our (
  $magic,   $archname, $blversion, $ivsize,
  $ptrsize, $longsize, $byteorder, $archflag, $perlversion
);
						# >=5.12
our  @svnames = ("NULL");			# 0
push @svnames, "BIND"   if $] >= 5.009;	# 1
push @svnames, ("IV", "NV");			# 2,3
push @svnames, "RV"     if $] < 5.011;		#
push @svnames, ("PV", "PVIV", "PVNV", "PVMG");	# 4-7
push @svnames, "BM"     if $] < 5.009;
push @svnames, "REGEXP" if $] >= 5.011;	# 8
push @svnames, "GV"     if $] >= 5.009;	# 9
push @svnames, ("PVLV", "AV", "HV", "CV");	# 10-13
push @svnames, "GV"     if $] < 5.009;
push @svnames, ("FM", "IO");			# 14,15

sub dis_header($) {
  my ($fh) = @_;
  my $str = $fh->readn(3);
  if ($str eq '#! ')  {
    $str .= $fh->GET_comment_t;
    $str .= $fh->GET_comment_t;
    $magic = $fh->GET_U32;
  } else {
    $str .= $fh->readn(1);
    $magic = unpack( "L", $str );
  }
  warn("bad magic") if $magic != 0x43424c50;
  $archname  = $fh->GET_strconst();
  $blversion = $fh->GET_strconst();
  $ivsize    = $fh->GET_U32();
  $ptrsize   = $fh->GET_U32();
  if ( $blversion ge '"0.06_03"' ) {
    $longsize = $fh->GET_U32();
  }
  if ( $blversion gt '"0.06"' or $blversion eq '"0.04"' ) {
    $byteorder = $fh->GET_strconst();
  }
  if ( $blversion ge '"0.06_05"' ) {
    $archflag = $fh->GET_U16();
  }
  if ( $blversion ge '"0.06_06"' ) {
    $perlversion = $fh->GET_strconst();
  }
}

sub get_header() {
  my @result = (
		$magic,   $archname,  $blversion, $ivsize,
		$ptrsize, $byteorder, $longsize,  $archflag,
		$perlversion
	       );
  if (wantarray) {
    return @result;
  }
  else {
    my $hash = {
		magic       => $magic,
		archname    => $archname,
		blversion   => $blversion,
		ivsize      => $ivsize,
		ptrsize     => $ptrsize,
	       };
    for (qw(magic archname blversion ivsize ptrsize byteorder
	    longsize archflag perlversion))
    {
	$hash->{$_} = $$_ if defined $$_;
    }
    return $hash;
  }
}

sub print_insn {
  my ( $insn, $arg, $comment ) = @_;
  undef $comment unless $comment;
  if ( defined($arg) ) {
    # threaded or unthreaded
    if ( $insn eq 'newopx' or $insn eq 'ldop' and $] > 5.007) {
      my $type = $arg >> 7;
      my $size = $arg - ( $type << 7 );
      $arg .= sprintf( " \t# size:%d, type:%d %s", $size, $type ) if $comment;
      printf "\n# [%s %d]\n", $opname[$type], $ix++ if $comment;
    }
    elsif ( !$comment ) {
      ;
    }
    elsif ( $insn eq 'stpv' ) {
      $arg .= "\t# " . $comment if $comment ne '1';
      printf "# -%s- %d\n", 'PV', $ix++;
    }
    elsif ( $insn eq 'newsvx' ) {
      my $type = $arg & 0xff; # SVTYPEMASK
      $arg .= sprintf("\t# type=%d,flags=0x%x", $type, $arg);
      $arg .= $comment if $comment ne '1';
      printf "\n# [%s %d]\n", $svnames[$type], $ix++;
    }
    elsif ( $insn eq 'gv_stashpvx' ) {
      $arg .= "\t# " . $comment if $comment ne '1';
      printf "\n# [%s %d]\n", "STASH", $ix++;
    }
    elsif ( $insn eq 'ldspecsvx' ) {
      $arg .= "\t# $specialsv_name[$arg]";
      $arg .= $comment if $comment ne '1';
      printf "\n# [%s %d]\n", "SPECIAL", $ix++;
    }
    elsif ( $insn eq 'ldsv' ) {
      $arg .= "\t# " . $comment if $comment ne '1';
      printf "# -%s-\n", 'GP/AV/HV/NULL/MG';
    }
    elsif ( $insn eq 'gv_fetchpvx' ) {
      $arg .= "\t# " . $comment if $comment ne '1';
      printf "\n# [%s %d]\n", 'GV', $ix++;
    }
    elsif ( $insn eq 'sv_magic' ) {
      $arg .= sprintf( "\t# '%s'", chr($arg) );
    }
    elsif ( $insn =~ /_flags/ ) {
      $arg .= sprintf( "\t# 0x%x", $arg );
    }
    elsif ( $insn eq 'op_type' and $] < 5.007 ) {
      my $type = $arg;
      $arg .= sprintf( "\t# [ %s ]", $opname[$type] );
    }
    else {
      $arg .= "\t# " . $comment if $comment ne '1';
    }
    printf "%s %s\n", $insn, $arg;
  }
  else {
    $insn .= "\t# " . $comment if $comment ne '1';
    print $insn, "\n";
  }
}

sub print_insn_bare {
  my ( $insn, $arg ) = @_;
  if ( defined($arg) ) {
    printf "%s %s\n", $insn, $arg;
  }
  else {
    print $insn, "\n";
  }
}

sub disassemble_fh {
  my $fh      = shift;
  my $out     = shift;
  my $verbose = shift;
  my ( $c, $getmeth, $insn, $arg );
  $ix = 1;
  bless $fh, "B::Disassembler::BytecodeStream";
  dis_header($fh);
  if ($verbose) {
    printf "#magic       0x%x\n", $magic; #0x43424c50
    printf "#archname    %s\n", $archname;
    printf "#blversion   %s\n", $blversion;
    printf "#ivsize      %d\n", $ivsize;
    printf "#ptrsize     %d\n", $ptrsize;
    printf "#byteorder   %s\n", $byteorder if $byteorder;
    printf "#longsize    %d\n", $longsize  if $longsize;
    printf "#archflag    %d\n", $archflag  if defined $archflag;
    printf "#perlversion %s\n", $perlversion if $perlversion;
    print "\n";
  }
  while ( defined( $c = $fh->getc ) ) {
    $c    = ord($c);
    $insn = $insn_name[$c];
    if ( !defined($insn) || $insn eq "unused" ) {
      my $pos = $fh->tell - 1;
      warn "Illegal instruction code $c at stream offset $pos.\n";
    }
    $getmeth = $insn_data{$insn}->[2];
    $arg     = $fh->$getmeth();
    if ( defined($arg) ) {
      &$out( $insn, $arg, $verbose );
    }
    else {
      &$out( $insn, undef, $verbose );
    }
  }
}

1;

__END__

# Local Variables:
#   mode: cperl
#   cperl-indent-level: 2
#   fill-column: 100
# End:
# vim: expandtab shiftwidth=2: