HTML::SiteTear - Make a separated copy of a part of the site


HTML-SiteTear documentation Contained in the HTML-SiteTear distribution.

Index


Code Index:

NAME

Top

HTML::SiteTear - Make a separated copy of a part of the site

VERSION

Top

Version 1.44

SYMPOSIS

Top

 use HTML::SiteTear;

 $p = HTML::SiteTear->new("/dev1/website/index.html");
 $p->copy_to("/dev1/website2/newindex.html");

DESCRIPTION

Top

This module is to make a separated copy of a part of web site in local file system. All linked files (HTML file, image file, javascript, cascading style shieet) from a source HTML file will be copied under a new page.

This module is useful to make a destributable copy of a part of a web site.

METHODS

Top

new

    $p = HTML::SiteTear->new($source_path);

    $p = HTML::SiteTear->new('source_path' => $source_path,
                             'site_root_path' => $root_path,
                             'site_root_url' => $url);

    $p = HTML::SiteTear->new('source_path' => $source_dir,
                             'member_files' => \@pathes);

Make an instance of this module. The path to source HTML file "$source_path" is required as an arguemnt. See ABSOLUTE LINK about 'site_root_path' and 'site_root_url' parameters

copy_to

    $p->copy_to($destination_path);

Copy $source_path into $destination_path. All linked file in $source_path will be copied into directories under $destination_path

ABSOLUTE LINK

Top

AUTHOR

Top

Tetsuro KURITA <tkurita@mac.com>


HTML-SiteTear documentation Contained in the HTML-SiteTear distribution.
package HTML::SiteTear;

use 5.008;
use strict;
use warnings;
use File::Basename;
use File::Spec;
use File::Path;
use File::Find;
use Cwd;
use Carp;
use base qw(Class::Accessor);
__PACKAGE__->mk_accessors( qw(source_path
                             site_root_path
                             site_root_url
                             target_path
                             member_files) );

use HTML::SiteTear::Root;
use HTML::SiteTear::Page;

use Data::Dumper;

our $VERSION = '1.44';

our @DEFAULT_HTML_SUFFIXES = qw(.html .htm .xhtml);

sub new {
    my $class = shift @_;
    my $self;
    if (@_ == 1) {
        $self = bless {'source_path' => shift @_}, $class;

    } else {
        my %args = @_;
        $self = $class->SUPER::new(\%args);
    }
    
    $self->source_path or croak "source_path is not specified.\n";
    (-e $self->source_path) or croak $self->source_path." is not found.\n";
        
    if (-d $self->source_path) {
        unless (File::Spec->file_name_is_absolute($self->source_path)) {
            my $cwd = fix_dir_path(cwd);
            $self->source_path(
                URI::file->new($self->source_path)->abs($cwd)->file);
        }
        
        unless ($self->member_files) {
            my @htmlfiles;
            my $wanted = sub {
                my $name = $_; 
                if (grep {$name =~ /\Q$_\E$/} @DEFAULT_HTML_SUFFIXES) {
                    push @htmlfiles, $File::Find::name;
                }
            };
            find($wanted, $self->source_path);
            if (@htmlfiles) {
                $self->member_files(\@htmlfiles);
            } else {
                croak "Can't find HTML files under $self->source_path.\n";
            }
        }
        $self->source_path(fix_dir_path($self->source_path));
        
    } else {
        if ($self->member_files) {
            croak $self->source_path.
                    " is not a directory. Must be a directory.\n";
        }    
    }
        
    return $self;
}

sub page_filter {
    my ($class, $module) = @_;
    return HTML::SiteTear::Page->page_filter($module);
}

sub copy_to {
    #print "start copy_to in SiteTear.pm\n";
    my ($self, $destination_path) = @_;
    my $source_path = $self->source_path;
    if ($self->member_files) {
        return $self->copy_to_dir($destination_path);
    }

    if (-e $destination_path){
        if (-d $destination_path) {
            $destination_path = File::Spec->catfile($destination_path,
                                               basename($source_path));
        }
    } else {
        my ($name, $dir) = fileparse($destination_path);
        mkpath($dir);
        unless ($name) {
            $destination_path = File::Spec->catfile($dir,
                                                basename($source_path));
        }
    }
    
    $self->target_path($destination_path);
    my $root = HTML::SiteTear::Root->new(%$self);
    my $new_source_page = HTML::SiteTear::Page->new(
                                        'parent' => $root,
                                        'source_path' => $source_path);
    $new_source_page->linkpath(basename($destination_path) );
    #$new_source_page->link_uri(URI::file->new(Cwd::abs_path($destination_path)));
    $new_source_page->link_uri(URI::file->new_abs($destination_path));
    $new_source_page->copy_to_linkpath;
    return $new_source_page;
}

sub fix_dir_path {
    my ($path) = @_;
    return File::Spec->catfile($path, File::Spec->curdir);
}

sub copy_to_dir {
    my ($self, $destination_path) = @_;
    if (-e $destination_path){
        unless (-d $destination_path) {
            croak $destination_path."is not directory.\n";
        }
    }

    $destination_path = fix_dir_path($destination_path);
    
    $self->target_path($destination_path);
    my $root = HTML::SiteTear::Root->new(%$self);
    my $source_root_uri = $root->source_root_uri;
    my $dest_uri = URI::file->new($destination_path);
    my @results;
    foreach my $file (@{$self->member_files}) {
        my $a_member_file = $file;
        unless (File::Spec->file_name_is_absolute($a_member_file)) {
            $a_member_file = URI::file->new($a_member_file)
                                   ->abs($self->source_path)->file;
        }
        my $page = HTML::SiteTear::Page->new(
                                            'parent' => $root,
                                            'source_path' => $a_member_file);
        my $rel_from_source_root = $page->source_uri->rel($source_root_uri);
        my $abs_from_dest = $rel_from_source_root->abs($dest_uri);
        $page->link_uri($abs_from_dest);
        $page->copy_to_linkpath;
        push @results, $page;
    }
    return \@results;
}

1;