/usr/local/CPAN/Plagger/Plagger/Text.pm


package Plagger::Text;
use strict;
use base qw( Class::Accessor::Fast );
__PACKAGE__->mk_accessors(qw( type data ));

use overload q("") => sub { $_[0]->data }, fallback => 1;

use HTML::Tagset;
use Plagger::Util;

sub new {
    my($class, %param) = @_;
    bless {%param}, $class;
}

sub new_from_text {
    my($class, $text) = @_;

    return unless defined $text;
    utf8::decode($text) unless utf8::is_utf8($text);

    my @tags = $text =~ m!<(\w+)\s?/?>!g;
    my @unknown = grep !$HTML::Tagset::isKnown{$_}, @tags;
    my $type;
    if (@unknown > @tags / 2) {
        $type = 'text';
    } elsif (@tags || $text =~ m!&(?:amp|gt|lt|quot);!) {
        $type = 'html';
    } else {
        $type = 'text';
    }

    bless { type => $type, data => $text }, $class;
}

sub is_html {
    my $self = shift;
    $self->type eq 'html';
}

sub is_text {
    my $self = shift;
    $self->type eq 'text';
}

sub html {
    my $self = shift;
    if ($self->is_html) {
        return $self->data;
    } else {
        Plagger::Util::encode_xml($self->data);
    }
}

sub plaintext {
    my $self = shift;
    if ($self->is_html) {
        return Plagger::Util::strip_html($self->data);
    } else {
        return $self->data;
    }
}

sub unicode { $_[0]->data }
sub utf8    { Encode::encode_utf8($_[0]->data) }
sub encode  { Encode::encode($_[1], $_[0]->data) }

sub serialize {
    my $self = shift;
    $self->data;
}

1;