/usr/local/CPAN/DSlib/DS/Importer/TabStream.pm
#!perl
# ########################################################################## #
# Title: Tabular stream to datastream importer
# Creation date: 2007-03-05
# Author: Michael Zedeler
# Description: Produces a datastream from a tab separated stream (IO::Handle)
# Data Stream class
# Data importer
# File: $Source: /data/cvs/lib/DSlib/lib/DS/Importer/TabStream.pm,v $
# Repository: kronhjorten
# State: $State: Exp $
# Documentation: inline
# Recepient: -
# TODO Must store field order internally.
# ########################################################################## #
package DS::Importer::TabStream;
use base qw{ DS::Importer };
use strict;
use Carp::Assert;
our ($VERSION) = $DS::VERSION;
our ($REVISION) = '$Revision: 1.1 $' =~ /(\d+\.\d+)/;
require DS::TypeSpec;
# Important: calling this method with no typespec will make
# the class read a header from the stream and use it
# to create a typespec.
sub new {
my( $class, $fh, $target, $typespec, $field_order, $row ) = @_;
assert($fh->isa('IO::Handle'));
my @fields;
if( not defined( $typespec ) ) {
# Get header from stream to create type spec from
assert( (not defined $field_order),
"Can't handle specifying field order if fields derived from stream header." );
# Get first non-comment line in stream
my $line = getline($fh);
$line =~ s/[\n\r]+$//;
$line =~ s/(\s+|$)/\n/g;
my @fields = split /\s+/, $line;
$typespec = new DS::TypeSpec( [ @fields ] );
$field_order = [@fields];
}
my $self = $class->SUPER::new( $typespec, $target, $row );
$self->{field_order} = $field_order;
$self->{fh} = $fh;
return $self;
}
sub _fetch {
my($self) = @_;
my $result = undef;
unless($self->{fh}->eof()) {
my ($line) = getline($self->{fh}) =~ /^([^\n\r]+)/;
my (@line) = split /\t/, $line;
# Replace values that are not defined (because the field was empty or not there)
# with empty strings.
# This is a design decision: we could check whether each line has exactly the right
# number of fields. The problem is that Microsoft Excel truncates trailing empty fields
# on each line, which would trigger spurious errors.
for(my $i = 0; $i <= $#{$self->{field_order}}; $i++) {
$line[$i] = '' unless defined( $line[$i] );
}
@{$self->{row}}{@{$self->{field_order}}} = @line;
$result = 1;
}
return $result ? $self->{row} : undef;
}
# Functions (not methods!)
sub getline {
my( $fh ) = @_;
my $line;
do {
$line = $fh->getline();
} while( $line =~ /^#/ and not $fh->eof());
return $line;
}
1;