Python::Serialise::Pickle - a file for reading and writing pickled Python files


Python-Serialise-Pickle documentation Contained in the Python-Serialise-Pickle distribution.

Index


Code Index:

NAME

Top

Python::Serialise::Pickle - a file for reading and writing pickled Python files

SYNOPSIS

Top



    use Python::Serialise::Pickle;

    my $pr = Python::Serialise::Pickle->new("file/for/reading");
    while (my $data = $pr->load()) {
          print Dumper $data;
    }

    my $pw = Python::Serialise::Pickle->new(">file/for/writing");

    $pw->dump(['a' 'list']);
    $pw->dump("a string");
    $pw->dump(42);
    $pw->dump({'a'=>'hash'});

    $pw->close();  

DESCRIPTION

Top

Pickling is a method of serialising files in Python (another method, Marshalling, is also available).

This module is an attempt to write a pure Perl implementation of the algorithm.

METHODS

Top

new <filename>

Open a file for reading or writing. Can take any arguments that IO::File can.

load

Returns the next data structure from the pickle file or undef.

dump <data structure>

Takes a ref to an array or a hash or a number or string and pickles it.

Structures may be nested.

close

Closes the current file.

BUGS

Top

Almost certainly lots and lots.

Serialised objects

At the moment we don't deal with serialised objects very well. Should probably just take or return a Python::Serialise::Pickle::Object object.

The 'None' object

Similar to Perl's undef but an object. At the moment we deal with it badly because if we returned undef then that would signify the end of the Pickle file.

Should probably be returned as a special object or something.

Longs

There's no testing for longs

Unicode

Ditto

Some nested dictionaries

Dictionaries are the Python equivalent of hashes. This module can deal with most nested dictionaries but, for some reason, this one :

	a={'a':['two',{'goof':'foo', 'a':[1,2,3]}]}

causes it to fail.

Chnaging it slightly starts it working again.

Bad reading of specs

This is entirely my fault

ALTERNATIVES

Top

You could always dump the data structure out as YAML in Python and then read it back in with YAML in Perl.

AUTHOR

Top

Simon Wistow <simon@thegestalt.org>

COPYRIGHT

Top

SEE ALSO

Top

http://www.python.org, YAML, IO::File and the RESOURCES file in this distribution.


Python-Serialise-Pickle documentation Contained in the Python-Serialise-Pickle distribution.
package Python::Serialise::Pickle;

use strict;
use Data::Dumper;
use IO::File;
use vars qw($VERSION);

$VERSION   = '0.01';



my %_set_handlers = (
	'NUMBER' => \&_set_num,
	'STRING' => \&_set_string,
	'HASH'   => \&_set_dict,
	'ARRAY'  => \&_set_list,
);


my %_get_handlers = ( 
		'I' => \&_get_num,
		'L' => \&_get_num,
		'F' => \&_get_num,
		'S' => \&_get_string,
		'N' => \&_get_none,
		'l' => \&_get_list,
		'd' => \&_get_dict,
		'c' => \&_get_raw,
		'p' => \&_get_id,
		'i' => \&_get_raw,
		'(' => \&_get_compound,
);


sub new {
	my $class = shift;
	my $file  = shift || die "You must pass a file\n";

	## FIXME error here
	my $fh    = IO::File->new($file) || die "Couldn't open file\n";
	my $self = { _fh => $fh };

	return bless $self, $class;
	
}

sub load {
	my $self = shift;
	$self->{_cur_id} = 0;

	print "LOAD\n";

	my $id = $self->_get_char();
	return undef if (!defined $id or $id eq "");

	my $sub = $_get_handlers{$id}    || die "We have no handler to deal with '$id'\n";
	return $self->$sub();

}


sub dump {
	my $self = shift;
	my $val  = shift;



	my $sub = $_set_handlers{$self->_type($val)};
	
	my $line = $self->$sub($val);
	$line .= ".";
	
	$self->_write($line);
	return $line;

}


sub _backup {
	my $self = shift;
	print "BACKUP\n";
	$self->{_fh}->seek(-1,1);
}


sub _get_char
{
	my $self = shift;
	$self->{_fh}->read(my $data, 1);
	print "C=$data\n";

	return $data;
}

sub _get_line {
	my $self = shift;
	my $line = "";

	while (1) {
		my $char = $self->_get_char();
		last unless defined $char;
		last if $char eq "\n";
		$line .= $char;
	}

	return $line;
}

sub _write {
	my $self = shift;
	my $val  = shift;

	$self->{_fh}->write($val);
}


sub _get_num 
{
	my $self = shift;
	my %opts = @_;

	my $num  = $self->_get_line();

	unless (defined $opts{'ignore_end_marker'} && $opts{'ignore_end_marker'} == 1) {
		$self->_get_char();
	}


	return $num;	
}

sub _set_num 
{
	my $self = shift;
	my $num  = shift;
	my %opts = @_;


	my $return;
	if (int $num != $num) {
		$return =  "F$num\n";
	} else {
		$return = "I$num\n";
	}

	 $return .= $opts{'terminator'} if ($opts{'terminator'});

	return $return;
}


sub _get_string 
{
	my $self = shift;
	my %opts = @_;


	my $string = $self->_get_line();
	$string =~ s!^(['"])(.*)['"]$!$2!;	
	$string =~ s!"!\\"!g;
	eval "\$string = \"$string\";";

	my $id     = $self->_get_id();
	die "No id!\n" unless defined $id;

	unless (defined $opts{'ignore_end_marker'} && $opts{'ignore_end_marker'} == 1) {
		$self->_get_char();
	}


	return $string;
	

}

sub _set_string
{
	my $self = shift;
	my $string = shift;
	my %opts = @_;

	# escape some control chars
	$string =~ s{
                                (.)
                        }{
                                (ord($1)<33 || ord($1)>126)?sprintf '\\%.3o',ord($1):$1
                        }sxeg;
	
	my $return = "S";
	if ($string =~ /^'.+'$/) {
		$return .= "\"$string\"\n";
	} else {
		$return .= "'$string'\n";
	}
	


	$return .= $self->_set_id();
	$return .= $opts{'terminator'} if ($opts{'terminator'});


	return $return;

}


sub _get_id {
	my $self = shift;
	my %opts = @_;

	my $char = $self->_get_char();
	die "Got $char - was expecting 'p' for id"  unless $char eq 'p';
	return $self->_get_line();
}

sub _set_id {
	my $self = shift;
	
	my $id = $self->{_cur_id}++;
	return "p$id\n";
}

sub _get_list {
	my $self = shift;
	my %opts = @_;

	print "GET LIST\n";

	my $oid   = $self->_get_id();
	my @vals;

	while (1) {	
		my $id = $self->_get_char();
		if ($id eq '.' || $id eq 's' || $id eq 'a') {
			last;
		}
		if ($id eq 'g') {
			my $tmp = $self->_get_line();
			last;
		}	

		if ($id eq 't') {
			$self->_backup;
			last;
		}

		
		my $sub = $_get_handlers{$id};
		print "ID=$id\n";
		$opts{'ignore_end_marker'}=0;
		die "No handler for '$id' in get_list ",(join ",",@vals),""  unless defined $sub;
		push @vals, $self->$sub(%opts);
		

	}	
	print "END LIST\n";
	return \@vals;
}

sub _set_list {
	my $self = shift;
	my $list = shift;
	my %opts = @_;
	
	my $terminator = $opts{'terminator'} || "";

	my $return = "";
	$return   .= "(" unless ($opts{ignore_compound});
	$return   .= "l";
	$return   .= $self->_set_id();	

	$opts{'terminator'} = 'a';

	foreach my $val (@$list) {
		my $sub = $_set_handlers{$self->_type($val)};
                die "No handler to set '$val'" unless defined $sub;
                $return .= $self->$sub($val, %opts);

	}

	$return .= $terminator;
	return $return;

}



sub _get_compound {
        my $self = shift;
	my %opts = @_;


	my $id = $self->_get_char();
	
	if ($id eq 'l') { 
		$self->_get_list(%opts);
	} elsif ($id eq 'd') { 
		$self->_get_dict(%opts);
	} else {
		$self->_backup();
		$self->_get_tuple(%opts);
	}
}


sub _get_tuple {
	my $self = shift;
	my %opts = @_;
	my $last_id = $opts{'last_tuple_marker'} || ".";

	my @vals;

	print "GET TUPLE\n";	

        while (1) {
                my $id = $self->_get_char();
	   	if ($id eq '.' || $id eq 'a' || $id eq 's') {
                        last;
                }

		if ($id eq 'g') {
			$self->_get_line();
			next;
		}
		
		if ($id eq 't') {
			$self->_get_id();
			print "Getting ID\n";
			if ($opts{'ignore_end_marker'}) {	
				last;
			} else {
				next;
			}
		}
	        
                my $sub = $_get_handlers{$id};
                die "No handler for type '$id'" unless defined $sub;
                push @vals, $self->$sub(ignore_end_marker=>1);
                
        }
	print "END TUPLE\n";
        return \@vals;
}


sub _get_dict
{
	my $self = shift;
	my %opts = @_;
	my %dict;

	print "GET DICT\n";
	print "IGNORE END MARKER = ",$opts{'ignore_end_marker'},"\n";
	#$opts{'ignore_end_marker'}=1;

		
	my $id = $self->_get_id();



	while (1) {
                my $key_id = $self->_get_char();
		
		


		if ($key_id eq '.')  {
			$self->_backup() if $opts{'ignore_end_marker'};
			last;
		}
		last if ($key_id eq 's' || $key_id eq 'a');
			
		if ($key_id eq 'g') {
			my $tmp = $self->_get_line();
                        next;
		}


                my $key_sub = $_get_handlers{$key_id};
                die "No handler for key '$key_id'" unless defined $key_sub;
		
		my $key = $self->$key_sub( ignore_end_marker => 1);
		
		print "GOT KEY\n";		
			
                my $val_id = $self->_get_char();
                my $val_sub = $_get_handlers{$val_id};
                die "No handler for value '$val_id'" unless defined $val_sub;
		my $val =  $self->$val_sub(%opts);

		$dict{$key} = $val;		



	}
	print "END DICT\n";
	

	return \%dict;

}

sub _set_dict 
{
	my $self = shift;
	my $hash  = shift;
        my %opts = @_;

	my $return = "";
        $return   .= "(";
        $return   .= "d";
        $return   .= $self->_set_id();
	

	$opts{'ignore_compound'}   = 0;
	$opts{'ignore_end_marker'} = 1;
	$opts{'terminator'}        = "";

	foreach my $key (keys %{$hash}) {
		my $val = $hash->{$key};
		
		my $keysub = $_set_handlers{$self->_type($key)};
                die "No handler for setting key '$key'" unless defined $keysub;
                $return .= $self->$keysub($key, %opts);

		my $valsub = $_set_handlers{$self->_type($val)};
                die "No handler for setting val '$val'" unless defined $valsub;
                $return .= $self->$valsub($val, %opts);
				

		$return .= "s";
	}	

	return $return;

}





sub _get_none {
	my $self = shift;
	return $self->_get_raw;

}

sub _get_raw {
	my $self = shift;
	$self->_backup;


	my $val = "";
	while (1) {
		my $char = $self->_get_char();	
		last if ($char eq ".");
		$val .= $char;
		$val .= $self->_get_line();
	}
	return $val;
		
}

sub _type {
	my $self = shift;
	my $val = shift;

	my $ref = ref $val;

	return $ref if defined $ref && $ref ne "";

	return "NUMBER" if ($val =~ /^-?(?:\d+(?:\.\d*)?|\.\d+)$/);
	return "STRING";

}


sub close {
	my $self = shift;
	$self->{_fh}->close();
}

sub DESTROY {
	my $self = shift;
	$self->close();
}


1;