HTML::Element::Convert - Monkeypatch content conversion methods into HTML::Element


HTML-Element-Convert documentation Contained in the HTML-Element-Convert distribution.

Index


Code Index:

NAME

Top

HTML::Element::Convert - Monkeypatch content conversion methods into HTML::Element

VERSION

Top

Version 0.10

SYNOPSIS

Top

  use HTML::TreeBulder;
  use HTML::Element::Convert;

  my $tree = HTML::TreeBuilder->new_from_content($html);

  # Search for some JSON-encoded meta-data embedded in the document and extract it:
  my $element = $tree->look_down(...);
  my $hash = $element->extract_content;

  # This time extract the YAML data and delete the containing element from the tree:
  $element = $tree->look_down(...);
  $hash = $element->pull_content;

  # Convert the content of any <div> with a 'lang="markdown"' attribute into HTML:
  $tree->convert_content;

$element->convert_content

Look for every div below $element containing a lang attribute. If it recognizes lang, it will convert and replace the div's content.

Currently, only markdown is supported.

$content = $element->extract_content([TYPE])

Extract and parse the content of $element. If TYPE is given, then this method will assume the content is of the given type, and try to parse it accordingly. Otherwise it will use the lang attribute of $element to detemine the type.

$content = $element->pull_content([TYPE])

Like extract_content, extract and parse the content of $element. It will also delete $element from the tree.

AUTHOR

Top

Robert Krimen, <rkrimen at cpan.org>

BUGS

Top

Please report any bugs or feature requests to bug-html-element-convert at rt.cpan.org, or through the web interface at http://rt.cpan.org/NoAuth/ReportBug.html?Queue=HTML-Element-Convert. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes.

SUPPORT

Top

You can find documentation for this module with the perldoc command.

    perldoc HTML::Element::Convert

You can also look for information at:

* AnnoCPAN: Annotated CPAN documentation

http://annocpan.org/dist/HTML-Element-Convert

* CPAN Ratings

http://cpanratings.perl.org/d/HTML-Element-Convert

* RT: CPAN's request tracker

http://rt.cpan.org/NoAuth/Bugs.html?Dist=HTML-Element-Convert

* Search CPAN

http://search.cpan.org/dist/HTML-Element-Convert

ACKNOWLEDGEMENTS

Top

COPYRIGHT & LICENSE

Top


HTML-Element-Convert documentation Contained in the HTML-Element-Convert distribution.
package HTML::Element::Convert;

use warnings;
use strict;

our $VERSION = '0.10';

use Carp;

our %PARSE_FUNC;
our %EXTRACT_FUNC;
BEGIN {
	1 and			do { eval { require Text::Markdown };	$PARSE_FUNC{markdown} = \&Text::Markdown::markdown unless $@ };
	1 and			do { eval { require JSON };		$PARSE_FUNC{JSON} = \&JSON::jsonToObj unless $@ };
	1 and			do { eval { require YAML::Syck };	$PARSE_FUNC{YAML} = \&YAML::Syck::Load unless $@ };
	$PARSE_FUNC{YAML} or	do { eval { require YAML };		$PARSE_FUNC{YAML} = \&YAML::Load unless $@ };
}


sub _as_text($) { return shift->as_text }
sub _as_raw_HTML($) { return join '', map { if (ref $_) { $_ = $_->as_XML; chomp $_; $_ =~ s/"/'/g } $_ } shift->content_list }

sub _extract_text { return _as_text shift }
$EXTRACT_FUNC{text} = \&_extract_text;

sub _extract_YAML { return $PARSE_FUNC{YAML}->(_as_raw_HTML(shift) . "\n") }
$EXTRACT_FUNC{YAML} = \&_extract_YAML;

sub _extract_JSON { return $PARSE_FUNC{JSON}->(_as_raw_HTML shift) }
$EXTRACT_FUNC{JSON} = \&_extract_JSON;

sub _extract_markdown { return HTML::TreeBuilder->new_from_content($PARSE_FUNC{markdown}->(_as_text shift)) }
$EXTRACT_FUNC{markdown} = \&_extract_markdown;

sub _extract {
	my $element = shift;
	my $type = shift;

	$type = "plain" if $type eq "text";
	my $func;
	for (qw(plain JSON YAML markdown)) {
		if ($type =~ m/^$_$/i) {
			$func = $_;
			last;
		}
	}

	return unless $func;
	return $EXTRACT_FUNC{$func}->($element);
}

package HTML::Element;

use Carp;
use UNIVERSAL;

sub extract_content {
	my $self = shift;
	my $type = shift;
	$type ||= $self->attr("lang");
	$type ||= "text";
	my $content;
	if	(! $type)	{}
	else 			{ $content = HTML::Element::Convert::_extract($self, $type) }
	return $content;
}

sub convert_content {
	my $self = shift;
	for (qw(markdown)) {
		my @elements = $self->look_down("_tag", "div", "lang", qr/$_/i);
		for my $element (@elements) {
			my $content = $element->extract_content;
			if (UNIVERSAL::can($content, "guts")) {
				my $new_element = $content->guts;
				if ($element eq $self) {
					$self->delete_content;
					$self->push_content($new_element->content_list);
					$self->attr("lang", undef);
					$new_element->delete;
				}
				else {
					$element->replace_with($new_element)->delete;
				}
			}
		}
	}
}

sub pull_content {
	my $self = shift;
	my $content = $self->extract_content(@_);
	$self->delete;
	return $content;
}

# TODO Check to see if we're using HTML::TreeBuilder::Select first...

# Alpha function
sub _extract_child_content {
	my $self = shift;
	if (UNIVERSAL::can($self, "select")) {
		my $query = shift or croak "Need a query (a CSS selector or XPath)";
		return $self->select($query => 'extract-content');
	}
}

# Alpha function
sub _pull_child_content {
	my $self = shift;
	if (UNIVERSAL::can($self, "select")) {
		my $query = shift or croak "Need a query (a CSS selector or XPath)";
		return $self->select($query => 'pull-content');
	}
}


1; # End of HTML::Element::Convert