| XML-SAXDriver-Excel documentation | Contained in the XML-SAXDriver-Excel distribution. |
XML::SAXDriver::Excel - SAXDriver for converting Excel files to XML
use XML::SAXDriver::Excel;
my $driver = XML::SAXDriver::Excel->new(%attr);
$driver->parse(%attr);
XML::SAXDriver::Excel was developed as a complement to XML::Excel, though it provides a SAX interface, for gained performance and efficiency, to Excel files. Specific object attributes and handlers are set to define the behavior of the parse() method. It does not matter where you define your attributes. If they are defined in the new() method, they will apply to all parse() calls. You can override in any call to parse() and it will remain local to that function call and not effect the rest of the object.
Source - (Reference to a String, ByteStream, SystemId)
String - **currently not supported** Contains literal Excel data.
Ex (Source => {String => $foo})
ByteStream - **currently not supported** Contains a filehandle reference.
Ex. (Source => {ByteStream => \*STDIN})
SystemId - Contains the path to the file containing
the Excel data. Ex (Source => {SystemId => '../excel/foo.xls'})
Handler - Contains the object to be used as a XML print handler
DTDHandler - Contains the object to be used as a XML DTD handler.
****There is no DTD support available at this time.
I'll make it available in the next version.****
SubChar - Specifies the character(s) to use to substitute illegal chars in xml tag names, that
will be generated from the first row, but setting the Dynamic_Col_Headings.
Col_Headings - Reference to the array of column names to be used for XML tag names.
Dynamic_Col_Headings - Should be set if you want the XML tag names generated dynamically
from the first row in Excel file. **Make sure that the number of columns
in your first row is equal to the largest row in the document. You
don't generally have to worry about if you are submitting valid Excel
data, where each row will have the same number of columns, even if
they are empty.
Headings_Handler - Should be used along with Dynamic_Col_Headings to provide a heading
normalization handler, to conform the headings to the XML 1.0
specifications. If not provided, a default will be used that only
works with ASCII chars, therefore any other character sets need to
provide a custom handler! The handler sub will be passed the heading
string as the first argument.
Ilya Sterin (isterin@cpan.org)
XML::Excel Spreadsheet::ParseExcel
| XML-SAXDriver-Excel documentation | Contained in the XML-SAXDriver-Excel distribution. |
package XML::SAXDriver::Excel; use strict; use Spreadsheet::ParseExcel; use base qw(XML::SAX::Base); use vars qw($VERSION $NS_SAXDriver_Excel); $VERSION = '0.06'; $NS_SAXDriver_Excel = 'http://xmlns.perl.org/sax/XML::SAXDriver::Excel'; sub _parse_bytestream { my $self = shift; my $stream = shift; die ("Cannot use String to parse a binary Excel file. You can only use a file by setting SystemId"); $self->_parse_Excel(\$stream); } sub _parse_string { my $self = shift; my $stream = shift; ### This is experimental due to the binary streams #require IO::String; #$io = IO::String->new($var); #$self->_parse_Excel($io); die ("Cannot use String to parse a binary Excel file. You can only use a file by setting SystemId"); #my @strings = split("\n", $self->{ParseOptions}->{Source}{String}); #$self->_parse_Excel(\@strings); } sub _parse_systemid { my $self = shift; my $path = shift; $self->_init(); $self->{ParseOptions}->{Parser}->Parse($path); $self->_end(); } sub _init { my $self = shift; ### Reset vars before parsing $self->{_row} = []; ## Used to push row values per row $self->{_row_num} = -1; ## Set at -1 since rows are counted from 0 $self->{_last_row_num} = 0; ## Used to save the last row value received $self->{_last_col} = 0; $self->{ParseOptions}->{Parser} ||= Spreadsheet::ParseExcel->new(CellHandler => \&cb_routine, Object => $self, NotSetCell => 1); $self->{ParseOptions}->{Headings_Handler} ||= \&_normalize_heading; $self->{_document} = {}; $self->{ParseOptions}->{Handler}->start_document($self->{_document}); $self->xml_decl($self->{ParseOptions}->{Declaration}); my $pm_excel = $self->_create_node( Prefix => 'SAXDriver::Excel', NamespaceURI => $NS_SAXDriver_Excel, ); $self->start_prefix_mapping($pm_excel); $self->end_prefix_mapping($pm_excel); $self->{_doc_element} = { Name => $self->{ParseOptions}->{File_Tag} || "records", Attributes => {}, }; $self->{ParseOptions}->{Handler}->start_element($self->{_doc_element}); } ## Parse file or string sub _end { my $self = shift; _print_xml_finish($self); ### Reset vars after parsing $self->{_row} = []; ## Used to push row values per row $self->{_row_num} = -1; ## Set at -1 since rows are counted from 0 $self->{_last_row_num} = 0; ## Used to save the last row value received $self->{ParseOptions}->{Handler}->end_element($self->{_doc_element}); return $self->{ParseOptions}->{Handler}->end_document($self->{_document}); } sub cb_routine($$$$$$) { my ($self, $oBook, $iSheet, $iRow, $iCol, $oCell) = @_; my $oWkS = $oBook->{Worksheet}[$iSheet]; $self->{ParseOptions}->{Col_Headings} ||= []; if ($iCol < $oWkS->{MaxCol}) { if ($self->{_last_col} > $iCol) { while ($self->{_last_col} < $oWkS->{MaxCol}) { push(@{$self->{_row}}, undef); $self->{_last_col}++; } _print_xml(@_); } if ($self->{_last_col} < $iCol) { while ($self->{_last_col} < $iCol) { push(@{$self->{_row}}, undef); $self->{_last_col}++; } } push(@{$self->{_row}}, $oCell->Value()); $self->{_last_row_num} = $iRow; $self->{_last_col}++; return; } push(@{$self->{_row}}, $oCell->Value());# if $flag == 0; _print_xml(@_); return; } sub _normalize_heading ### Default if no Headings_Handler is provided { my $heading= shift; my $sub_char = shift || '_'; $heading =~ s/^\s//g; $heading =~ s/\s$//g; $heading =~ s/^([^a-zA-Z|^_|^:])/$sub_char/g; ### We used to also replace the xml in the beginning, but I took it of per recommendation of Michael Rodriguez. $heading =~ s/[^a-zA-Z|^-|^.|^0-9|^:]/$sub_char/g; return $heading; } sub _print_xml { my ($self, $oBook, $iSheet, $iRow, $iCol, $oCell) = @_; ### Remember self is passed through the Spreadsheet::ParseExcel object my $oWkS = $oBook->{Worksheet}[$iSheet]; $self->{_last_row_num} = $iRow; $self->{_last_col} = 0; my $temp_row = $oCell->Value(); $self->{_row_num} = $self->{_last_row_num}; if (!@{$self->{ParseOptions}->{Col_Headings}} && !$self->{ParseOptions}->{Dynamic_Col_Headings}) { my $i = 1; @{$self->{ParseOptions}->{Col_Headings}} = map { "column" . $i++ } @{$self->{_row}}; } elsif (!@{$self->{ParseOptions}->{Col_Headings}} && $self->{ParseOptions}->{Dynamic_Col_Headings}) { @{$self->{ParseOptions}->{Col_Headings}} = map { $self->{ParseOptions}->{Headings_Handler}->($_, $self->{ParseOptions}->{SubChar}); } @{$self->{_row}}; $self->{_row} = []; ### Clear the @$row array return; ### So that it does not print the column headings as the content of the first node. } my $el = { Name => $self->{ParseOptions}->{Parent_Tag} || "record", Attributes => {}, }; $self->{ParseOptions}->{Handler}->start_element($el); for (my $i = 0; $i <= $#{$self->{ParseOptions}->{Col_Headings}}; $i++) { my $column = { Name => $self->{ParseOptions}->{Col_Headings}->[$i], Attributes => {} }; $self->{ParseOptions}->{Handler}->start_element($column); $self->{ParseOptions}->{Handler}->characters({Data => $self->{_row}->[$i]}); $self->{ParseOptions}->{Handler}->end_element($column); } $self->{ParseOptions}->{Handler}->end_element($el); $self->{_row} = []; ### Clear $row and start the new row processing } sub _print_xml_finish { my $self = shift; while (@{$self->{_row}} < 9) { push(@{$self->{_row}}, undef); } my $el = { Name => $self->{ParseOptions}->{Parent_Tag} || "record", Attributes => {}, }; $self->{ParseOptions}->{Handler}->start_element($el); for (my $i = 0; $i <= $#{$self->{ParseOptions}->{Col_Headings}}; $i++) { my $column = { Name => $self->{ParseOptions}->{Col_Headings}->[$i], Attributes => {} }; $self->{ParseOptions}->{Handler}->start_element($column); $self->{ParseOptions}->{Handler}->characters({Data => $self->{_row}->[$i]}); $self->{ParseOptions}->{Handler}->end_element($column); } $self->{ParseOptions}->{Handler}->end_element($el); } sub _create_node { shift; # this may check for a factory later return {@_}; } 1; __END__