/usr/local/CPAN/VirtualFS-ISO9660/VirtualFS/ISO9660.pm
#!/usr/bin/perl -l
package VirtualFS::ISO9660;
require 5.005_003; # only tested on 5.8.0.
use strict;
use warnings;
use Scalar::Util qw(dualvar);
use File::Spec;
use Carp qw(carp croak);
use Fcntl ':mode';
use Symbol; # need geniosym
# for debugging
#require Data::Dumper;
our $VERSION = 0.02;
our ($SEPARATOR_1, $SEPARATOR_2, $A_CHARACTERS, $D_CHARACTERS);
{ no strict 'vars';
*SEPARATOR_1 = \ '.';
*SEPARATOR_2 = \ ';';
*D_CHARACTERS = \ '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_';
*A_CHARACTERS = \ q# !"%&'()*+,-./0123456789:;<=>?ABCDEFGHIJKLMNOPQRSTUVWXYZ_#;
}
# see ECMA-119 for official ISO9660 format (available free of charge)
# http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-119.pdf
use constant { CDROM_SECTOR_SIZE => 2048, VOLUME_DESCRIPTOR_SECTOR => 16 };
sub new {
my $class = shift;
my $filename = shift or croak "No filename specified for " . __PACKAGE__ . "->new";
my %options = @_; # rest is in hash format
CORE::open (my $fh, '<', $filename) or return; # let *them* handle open failures!
binmode $fh;
my $buffer;
# try not to croak() unless it's the fault of the caller.
# that means, among other things, simply return undef (indicating an error)
# when the format of the ISO is invalid.
# read the boot-record volume descriptor
__readsectors($fh, $buffer, VOLUME_DESCRIPTOR_SECTOR) or return;
my $voldesc = __extract_voldesc($buffer);
# read the path table
# the path table is, for whatever reason, a brief listing of every directory
# on the disc. There are efefctively three copies of this; one has its integers
# MSB-first, one has them LSB-first, and the third would be the actual complete
# pile of directory entries.
__readsectors($fh, $buffer, $voldesc->{lpathlocation},
int (($voldesc->{pathtablesize} + CDROM_SECTOR_SIZE - 1) / CDROM_SECTOR_SIZE));
my $pathtree = __build_pathtree(__extract_pathtable($buffer, $voldesc->{pathtablesize}));
#print Data::Dumper::Dumper($pathtree);
bless [$fh, $voldesc, $pathtree], $class;
}
# open a fake directory handle. $dirh->readdir() will do what you think it would.
# opendir(dirh, path);
# opendir(dirh, '/foo/bar/baz') opens /foo/bar/baz
# opendir(dirh, '/foo/bar/baz/') opens /foo/bar/baz
# opendir(dirh, 'foo/bar/baz') opens /foo/bar/baz
sub opendir {
my $this = shift;
my $loc;
my $treepos = $this->[2];
my (undef, $path) = @_;
my @parts = grep {!/^$/} File::Spec->splitdir($path); # ignore blank parts
if (@parts) {
for (@parts) {
unless ($treepos = $treepos->[1]{+uc}) {
$! = "Path part not found: $_";
return;
}
}
$loc = $treepos->[0];
} else {
# treat the root directory specially
$loc = $this->[1]{rootdir}{location};
}
# FIXME: use File::Spec
$_[0] = VirtualFS::ISO9660::DirHandle->__new($this->[0], $loc, $this, join('/', @parts) );
}
sub open {
my $this = shift;
croak "need 3-argument open" unless @_ == 3;
croak "2nd arg must be '<'" unless $_[1] eq '<';
my @stats = $this->stat($_[2]) or croak "can't stat $_[2]: $!";
croak "can't open() a directory" if S_ISDIR($stats[2]);
$_[0] = Symbol::geniosym();
tie( *{$_[0]}, 'VirtualFS::ISO9660::FileHandle', $this->[0], $stats[1], $this)
and return 1;
}
sub stat {
my $this = shift;
my $filename = uc shift; # note the call to uc; ISO9660 names are all UPPERCASE
my $ref;
my $version;
# FIXME: use File::Spec
$filename = '/'.$filename unless $filename =~ m#^/#;
if ($filename =~ s/;(.*)//) {
$version = $1-1;
}
unless (exists($this->[4]{$filename})) {
my (undef, $path, undef) = File::Spec->splitpath($filename);
$this->opendir(my $dirh, $path) or croak "can't open path $path: $!";
() = $dirh->readdir(); # in list context -- this will read thru the entire dir, populating the cache
croak "can't find file $filename" unless exists($this->[4]{$filename});
}
$ref = $this->[4]{$filename};
unless (defined($version)) { $version = $#$ref; }
croak "version $version of $filename doesn't exist" unless defined $ref->[$version];
$ref = $ref->[$version][1];
return $this->__stat($ref);
}
# ============================================================
# accessors
# ============================================================
# $o->identifier()
# returns a hash containing the keys 'system', 'volume',
# 'volume_set', 'publisher', 'preparer', and 'application',
# as well as their corresponding values (of course).
# $o->identifier(key)
# assuming 'key' matches one of the above keys, returns the
# value for that key.
# $o->identifier(key1, key2, key3)
# assuming that key1,key2,key3 each match one of the above keys,
# returns a list containing the values for those keys, in the
# same order.
sub identifier {
my $this = shift;
if (@_ == 0) {
# return a hashref
my %h;
@h{'system', 'volume', 'volume_set', 'publisher', 'preparer', 'application'} =
@{$this->[1]}{'system_id', 'volume_id', 'volume_set_id', 'publisher_id', 'preparer_id', 'application_id'};
return %h;
} else {
my @list = @{$this->[1]}{ map "$_\_id", @_ };
return wantarray?@list:pop@list;
}
}
# $o->id_file()
# See the 'identifier' method; only, the keys here are:
# 'copyright', 'abstract', and 'biblio'.
sub id_file {
my $this = shift;
if (@_ == 0) {
# return a hashref
my %h;
@h{'copyright', 'abstract', 'biblio'} =
@{$this->[1]}{'copyright_file', 'abstract_file', 'biblio_file'};
return %h;
} else {
my @list = @{$this->[1]}{ map "$_\_file", @_ };
return wantarray?@list:pop@list;
}
}
# $o->extract_file()
# $o->extract_file('/COPYRIGH', 'to-file');
# This is done using CORE::open on the to-file, which means that
# in perl 5.8.0 you can do:
# $o->extract_file('/COPYRIGH', \$scalar);
# and the contents of the file will be extracted into $scalar.
sub extract_file {
my $this = shift;
croak 'usage: extract_file(iso-filename, output-filename)' unless @_>=2;
my $from = shift;
my $to = shift;
$this->open(my $infh, '<', $from) or return; # eh, right now open() will croak anyway.
CORE::open(my $outfh, '>', $to);
local $\; # don't let $\ screw with us
while(read($infh, my $buf, 4096)) { print $outfh $buf; }
}
# ============================================================
# internal functions
# ============================================================
# read a sector or sectors from the image
# usage: __readsectors(filehandle, buffer, start[, count])
# count defaults to 1 if not specified. And don't specify a 0.
#
# on success, returns 1 (a partial read is considered failure)
# on failure, returns undef
sub __readsectors {
my $count = $_[3] || 1;
unless (seek($_[0], $_[2] * CDROM_SECTOR_SIZE, 0)) { return }
my $ret = read($_[0], $_[1], $count * CDROM_SECTOR_SIZE);
unless ($ret == $count * CDROM_SECTOR_SIZE) { return }
return 1;
}
# path table record (ECMA-119 section 9.4)
# see extract_direntry and extrapolate for basic use
sub __extract_pathtablerec {
my %h;
my $sref = ref($_[0])?$_[0]:\$_[0];
my $len = unpack('C', $$sref);
@h{'LEN-EAR', 'location', 'parent', 'name'} =
unpack("x C V v A$len x![v]", $$sref);
if (ref $_[0]) {
my $totallen = 1 + 1 + 4 + 2 + $len + ($len&1);
${$_[1]} -= $totallen if ref $_[1];
substr($$sref, 0, $totallen, '');
}
return \%h;
}
# extract_pathtable($scalar, $pathtablesize)
# extracts all the path table entries from $scalar
# also, there'd sure as hell better be $pathtablesize bytes worth of entries
# in there...
# in scalar context, returns an arrayref
sub __extract_pathtable {
my @table;
my $data = shift;
my $left = shift;
push @table, __extract_pathtablerec(\$data, \$left)
while $left>0;
return \@table;
}
# build_pathtree(\@array)
# returns a convenient hashref of all the directories.
sub __build_pathtree {
my $h;
my @hrefs;
my $i=0;
for (@{$_[0]}) {
unless (@hrefs) { # special case: the root directory
$hrefs[0] = $h = [$_->{parent}];
$i++;
next;
}
$hrefs[$_->{parent} - 1][1]{ $_->{name} } =
$hrefs[$i] = [ $_->{location} ];
$i++;
}
return $h;
}
# directory record (ECMA-119 section 9.1)
# extract_direntry($scalar)
# returns a happy hashref.
#
# alternatively, you can do:
# __extract_direntry(\$scalar)
# which, in addition to returning the hashref, eats the directory
# entry out of $scalar.
sub __extract_direntry {
my %h;
my $sref = ref($_[0])?$_[0]:\$_[0]; # make sure we have a reference to ease unpacking
@h{'LEN-DR', 'LEN-EAR', 'location', 'size', 'time', 'flags', 'unitsize',
'gapsize', 'volseqnum', 'name'} = unpack(
'C C Vx[N] Vx[N] a7 C C C vx[n] C/a', $$sref);
# if they gave us a reference, eat the data out of the scalar.
if (ref $_[0]) { substr($$sref, 0, $h{'LEN-DR'}, ''); }
return \%h;
}
# volume descriptor (ECMA-119 section 8)
# __extract_voldesc($scalar)
sub __extract_voldesc {
my %h;
@h{'type', 'stdid', 'version'} =
unpack('CA5C', $_[0]);
# how we grok the rest depends on the type.
# 0=Boot record
# 1=Primary volume descriptor
# 2=Supplementary volume descriptor
# 3=Volume partition descriptor
# 4-254=RFU
# 255=Volume descriptor set terminator
if ($h{type} == 0) {
# section 8.2: boot record
@h{'sysid','bootid'} = unpack('x7A32A32', $_[0]);
} elsif ($h{type} == 1) {
# section 8.4: primary volume descriptor
@h{'system_id', 'volume_id', 'size', 'setsize', 'seqnum', 'blocksize',
'pathtablesize', 'lpathlocation', 'optlpathlocation',
#'mpathlocation', 'optmpathlocation',
'rootdir',
'volume_set_id', 'publisher_id', 'preparer_id', 'application_id',
'copyright_file', 'abstract_file', 'biblio_file',
'create_time', 'modify_time', 'expire_time', 'effective_time',
'format_version'} = unpack(q{
x7 # skip over the 7 bytes we pulled out at the very beginning
x # byte 8 is RFU and should be 0 in the Primary Volume Descriptor
# (probably for alignment purposes)
A32 # System Identifier
A32 # Volume Identifier
x8 # RFU, should be 0
V # Volume Space Size
x[N] # Volume Space Size again, only in Motorola order
x32 # another RFU
vx[n] # Volume Set Size and its motorola form
vx[n] # Volume Sequence Number
vx[n] # Logical Block Size
Vx[N] # Path Table Size
V # Type L path table location
V # Type L path table location (Optional)
x[N] # Type M path table location
x[N] # Type M path table location (Optional)
a34 # 'Directory Record for Root Directory' (??? wtf?)
A128 # Volume Set Identifier
A128 # Publisher Identifier
A128 # Data Preparer Identifier
A128 # Application Identifier
A37 # Copyright File Identifier
A37 # Abstract File Identifier
A37 # Bibliographic File Identifier
a17 # Volume Creation Timestamp
a17 # Volume Modification Timestamp
a17 # Volume Expiration Timestamp
a17 # Volume Effective Timestamp
C # File Structure Version
x # RFU
}, $_[0]);
$h{rootdir} = __extract_direntry($h{rootdir});
} elsif ($h{type} == 2) {
# section 8.5, Supplementary Volume Descriptor
# gahhhh...
} elsif ($h{type} == 3) {
# section 8.6, Volume Partition Descriptor
@h{'sysid', 'partition_id', 'partition_location', 'partition_size'} =
unpack('x7xA32A32Vx[N]Vx[N]', $_[0]);
}
return \%h;
}
# $obj->__startpos('/path/to/filename')
# returns the offset into the .ISO file where you can find the contents of that
# file (for debugging purposes).
sub __startpos {
my $this = shift;
my @x = $this->stat($_[0]);
return undef unless @x; # no data? give up.
# $x[1] will point to the info object
return ($x[1]{location} * CDROM_SECTOR_SIZE);
}
sub __stat {
my $this = shift;
my $ref = shift;
my $perms = S_IRUSR|S_IRGRP|S_IROTH; # everybody can read
# nobody can write (ISO9660 is readonly)
# nobody can execute (how's it gonna be executed?)
if ($ref->{flags} & 2) {
$perms |= S_IFDIR;
} else {
$perms |= S_IFREG;
}
return (
$this, # "device number", return this object
$ref, # "inode number", return the cache ref
$perms, # permissions
1, # number of hard links
0, # uid
0, # gid
0, # rdev(???)
$ref->{size}, # size
0, # atime
0, # mtime
0, # ctime
CDROM_SECTOR_SIZE, # blksize
int(($ref->{size} + CDROM_SECTOR_SIZE - 1) / CDROM_SECTOR_SIZE), # block count
);
}
package VirtualFS::ISO9660::DirHandle;
use Scalar::Util qw(dualvar);
use constant { CDROM_SECTOR_SIZE => 2048 };
*__extract_direntry = \&VirtualFS::ISO9660::__extract_direntry;
# new (iso_filehandle, sector, ISO9660 object, pathname)
# pathname won't start with '/', nor will it end with one.
sub __new {
my $class = shift;
my ($fromfh, $sector, $parent, $name) = @_;
CORE::open(my $fh, '<&', $fromfh) or return;
seek($fh, $sector * CDROM_SECTOR_SIZE, 0);
# / (root) and /any/dir/here are different in that the former
# ends in /, while the latter does not. This causes confusion.
# FIXME: use File::Spec
$name = '/'.$name if $name ne '';
bless [$fh, $sector, 0, $parent, $name, undef], $class;
}
sub rewinddir {
my $this = shift;
$this->[2] = 0;
seek($this->[0], $this->[1] * CDROM_SECTOR_SIZE, 0);
}
# merely for completeness
sub closedir {}
sub readdir {
if (wantarray) {
my $this = shift;
my @x;
my $x;
push @x, $x while $x=$this->__readdir;
return @x;
} else {
goto &__readdir;
}
}
sub __readdir {
# $this->
# [0] = filehandle of ISO image
# [1] = sector to start at
# [2] = byte offset within directory
# [3] = VirtualFS::ISO9660 object that spawned us (used for caching)
# [4] = path of directory, parts separated by '/' and ending with '/'
# [5] = total size of directory, undef if we don't know it yet.
my ($buf, $len);
# check EOF (err, EOD)
return if (defined($_[0][5]) && $_[0][5] <= $_[0][2]);
read($_[0][0], $len, 1)==1 or return; # find out the size of the entry
$len = unpack('C',$len);
return unless $len; # I can't find what officially marks the end,
# but this seems to work
seek($_[0][0], -1, 1) or return;
my $where = tell($_[0][0]);
read($_[0][0], $buf, $len)==$len or return;
$_[0][2] += $len;
my $info = __extract_direntry($buf);
# cache the location of this file for future reference
# if there's a version (;<number>), extract it.
if ($info->{name} =~ s/;(.*)//) {
$info->{version} = $1-1;
} else {
$info->{version} = dualvar(0, ''); # this is equivalent to, but distinguishable from, an explicit version of 1.
}
$info->{name} =~ s/\.$//; # remove any trailing .'s
# if $this->[5] is undef, then this is the very first entry in the directory.
if ($info->{name} eq "\c@") {
$_[0][5] = $info->{size};
$info->{name} = '.';
if ($_[0][4] eq '') {
# special case to cache the root directory
$_[0][3][4]{'/'}[$info->{version}] = [$where, $info];
}
} elsif ($info->{name} eq "\cA") {
$info->{name} = '..';
} else {
# not a special name; cache this entry.
# FIXME: use File::Spec
$_[0][3][4]{$_[0][4] . '/' . $info->{name}}[$info->{version}] = [$where, $info];
}
return $info->{name};
}
1;
package VirtualFS::ISO9660::FileHandle;
use constant { CDROM_SECTOR_SIZE => 2048 };
# TIEHANDLE (iso_filehandle, info, ISO9660 object)
sub TIEHANDLE {
my $class = shift;
my ($fromfh, $info, $parent) = @_;
open(my $fh, '<&', $fromfh) or return;
seek($fh, $info->{location} * CDROM_SECTOR_SIZE, 0) or return;
bless [$fh, $info, $parent,
$info->{location} * CDROM_SECTOR_SIZE, # byte 0 is here
$info->{location} * CDROM_SECTOR_SIZE + $info->{size} # EOF is here
], $class;
}
# no need to support WRITE -- the ISO format is read-only except when it's being
# built from scratch.
# Same goes for PRINT and PRINTF.
# We need: READ, READLINE, and GETC.
sub GETC {
my $this = shift;
my $ret;
my $where = tell($this->[0]);
# if we're "outside" the file, fail
return undef unless $where >= $this->[3] && $where < $this->[4];
read($this->[0], $ret, 1) == 1 or return;
return $ret;
}
sub READ {
my $this = shift;
# READ(buffer, len, offset)
my (undef,$len,$ofs) = @_;
$ofs = 0 unless defined($ofs);
my $b = \$_[0];
# don't read past the end of our virtual file!
if ($len > $this->[4] - tell($this->[0])) { $len = $this->[4] - tell($this->[0]); }
# if $len ends up being 0 bytes, bail
return 0 unless $len>0;
return read($this->[0], $$b, $len, $ofs);
}
# My wish: That Perl_do_readline (pp_hot.c) was nice enough to provide readline()
# on tied filehandles by falling back to $obj->READ. This would do two things:
# -> Simplify this object
# -> As it is presently implemented, future extensions to how <$fh> handles
# $RS or $/ won't work here, as we are effectively reimplementing
# Perl_do_readline() here. If Perl_do_readline() worked by calling our
# READ method, however, it would work fine.
sub __READLINE {
my $buf;
my $len = 0;
my $rlen;
# read 4K of data at a time until we get something or run out of file.
$rlen = $len = READ($_[0], $buf, 4096);
until ($rlen==0 || (defined($/) && $buf =~ m[\Q$/]g)) { # the g makes perl set pos()
$len += ($rlen = READ($_[0], $buf, 4096, $len));
}
return undef if ($len == 0); # no more file!
return $buf if ($rlen == 0); # we ate the rest of the file!
$rlen = pos($buf);
substr($buf, $rlen, $len-$rlen, ''); # eat the rest of the buffer
seek($_[0][0], $rlen-$len, 1); # and fix the file position
return $buf;
}
sub READLINE {
if (wantarray) {
my @lines;
my $line;
push @lines, $line while defined($line = $_[0]->__READLINE);
return @lines;
}
goto &__READLINE;
}
sub STAT {
my $this = shift;
return $this->[2]->__stat($this->[1]);
}