/usr/local/CPAN/wikitext-perl/Text/WikiText/Output.pm


# WikiText parser modules, Copyright (C) 2006-7 Enno Cramer, Mikhael Goikhman
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the Perl Artistic License or the GNU General
# Public License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

package Text::WikiText::Output;

use strict;
use warnings;

use Text::WikiText ':types';

sub new ($) {
	my $class = shift;

	my $self = {};
	bless $self, $class;

	my %entities = $self->entities;
	$self->{entities} = \%entities;
	$self->{entity_re} = join '|', map { quotemeta } keys %entities;

	return $self;
}

sub escape ($$) {
	my $self = shift;
	my $text = shift;

	$text =~ s/$self->{entity_re}/$self->{entities}{$&}/ego;

	return $text;
}

sub separator ($) {
	return "\n";
}

my $RE_TLD = qr/
		com|edu|gov|int|mil|net|org
		|aero|biz|coop|info|museum|name|pro
		|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|az|ax
		|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz
		|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz
		|de|dj|dk|dm|do|dz
		|ec|ee|eg|eh|er|es|et|eu
		|fi|fj|fk|fm|fo|fr
		|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy
		|hk|hm|hn|hr|ht|hu
		|id|ie|il|im|in|io|iq|ir|is|it
		|je|jm|jo|jp
		|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz
		|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly
		|ma|mc|md|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz
		|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz
		|om
		|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py
		|qa
		|re|ro|ru|rw
		|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|sv|sy|sz
		|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz
		|ua|ug|uk|um|us|uy|uz
		|va|vc|ve|vg|vi|vn|vu
		|wf|ws
		|ye|yt|yu
		|za|zm|zw
/x;

sub fill_in_link {
	my ($self, $chunk) = @_;

	if ($chunk->{style} eq '') {
		# bitmap files
		if ($chunk->{target} =~ /\.(png|jpg|jpeg|gif|eps)$/) {
			$chunk->{style} = '=';

		# network protocols
		} elsif ($chunk->{target} =~ /^(http|ftp|news|mailto|irc):/) {
			$chunk->{style} = '>';

		# common top level domains
		} elsif ($chunk->{target} =~ /^(\w+\.){1,}$RE_TLD(\/|$)/) {
			$chunk->{style} = '>';

		# whitespace in urls is bad
		} elsif ($chunk->{target} =~ /\s/) {
			$chunk->{style} = '#';

		# fallback
		} else {
			$chunk->{style} = '>';
		}
	}

	$chunk->{label} ||= $chunk->{target};

	# outside link, without protocol and no directory identifier
	if ($chunk->{style} eq '>'
		&& $chunk->{target} !~ /^\w+:/
		&& $chunk->{target} !~ m,^(/|\.),
	) {
		if ($chunk->{target} =~ /@/) {
			$chunk->{target} = "mailto:" . $chunk->{target};

		} elsif ($chunk->{target} =~ /^www\./) {
			$chunk->{target} = "http://" . $chunk->{target};

		} elsif ($chunk->{target} =~ /^ftp\./) {
			$chunk->{target} = "ftp://" . $chunk->{target};

		} elsif ($chunk->{target} =~ /^(\w+\.){1,}$RE_TLD(\/|$)/) {
			$chunk->{target} = "http://" . $chunk->{target};
		}

		if ($chunk->{target} =~ /\.$RE_TLD$/) {
			$chunk->{target} .= '/';
		}
	}
}

sub dump_verbatim {
	my ($self, $verb, %opts) = @_;

	return $verb->{text};
}

# This helper method creates nice WikiText table, good for text based formats
sub dump_ascii_formatted_table {
	my ($self, $table, %opts) = @_;

	my @cell_texts = ();
	my @col_widths = ();
	my @col_aligns = ();
	my $is_compact = $opts{compact_tables};

	foreach my $row (@{$table->{content}}) {
		for (my $i = 0; $i < @{$row->{cols}}; $i++) {
			my $col = $row->{cols}[$i];
			my $text = $self->dump_text($col->{text}, %opts);
			$col_aligns[$i] = (!defined $col_aligns[$i]
				|| $col_aligns[$i]) && $text =~ /^[\d.]+$/;
			$text = " " if $text eq "" && !$is_compact;
			my $old_len = $col_widths[$i] || 0;
			my $new_len = length($text);
			push @cell_texts, $text;
			$col_widths[$i] = $new_len if $new_len > $old_len;
		}
	}

	my $separator_row = "+"
		. join("+", map { "-" x ($is_compact ? $_ : $_ + 2) } @col_widths)
		. "+\n";

	my $str = $separator_row;

	foreach my $row (@{$table->{content}}) {
		$str .= "|";

		for (my $i = 0; $i < @{$row->{cols}}; $i++) {
			### TODO: add support for $col->{span}
			my $col = $row->{cols}[$i];
			my $sign = $col_aligns[$i] ? "" : "-";
			$str .= "|" if $i;
			$str .= " " unless $is_compact;
			$str .= sprintf("%$sign$col_widths[$i]s", shift(@cell_texts));
			$str .= " " unless $is_compact;
		}

		$str .= "|\n";
		$str .= $separator_row if $row->{heading};
	}

	$str .= $separator_row;

	return $str;
}

# This helper method adds intentation block, good for text based formats
sub add_indentation_block {
	my ($self, $text, %opts) = @_;

	my $num_spaces = $opts{indent_spaces} || 2;

	join("", map { " " x $num_spaces . $_ . "\n" } split(/\n/, $text));
}

sub dump_list {
	my ($self, $list, %opts) = @_;

	my $str = '';

	my $first = 1;
	foreach my $sect (@$list) {
		$str .= $self->separator unless $first;
		$first = 0;

		if ($sect->{type} eq SECTION) {
			$str .= $self->dump_section($sect, %opts);

		} elsif ($sect->{type} eq DESCRIPTION) {
			$str .= $self->dump_description($sect, %opts);

		} elsif ($sect->{type} eq ENUMERATION) {
			$str .= $self->dump_enumeration($sect, %opts);

		} elsif ($sect->{type} eq LISTING) {
			$str .= $self->dump_listing($sect, %opts);

		} elsif ($sect->{type} eq QUOTE) {
			$str .= $self->dump_quotation($sect, %opts);

		} elsif ($sect->{type} eq TABLE) {
			$str .= $self->dump_table($sect, %opts);

		} elsif ($sect->{type} eq RULE) {
			$str .= $self->dump_rule($sect, %opts);

		} elsif ($sect->{type} eq VERBATIM) {
			$str .= $self->dump_verbatim($sect, %opts)
				unless $opts{no_verbatim};

		} elsif ($sect->{type} eq PRE) {
			$str .= $self->dump_preformatted($sect, %opts);

		} elsif ($sect->{type} eq CODE) {
			$str .= $self->dump_code($sect, %opts);

		} elsif ($sect->{type} eq P) {
			$str .= $self->dump_paragraph($sect, %opts);

		} elsif ($sect->{type} eq COMMENT) {
			# nada

		} else {
			warn(
				"Unrecognized block type '"
				. $sect->{type} . "' defined on line "
				. $sect->{line} . ".\n"
			);
		}
	}

	return $str;
}

sub dump {
	my ($self, $list, %opts) = @_;

	my $page = $self->dump_list($list, %opts);

	if ($opts{full_page}) {
		$opts{escaped_title} = $self->escape($opts{title} || 'No Title');
		$opts{escaped_author} = $self->escape($opts{author} || 'Unknown');

		$page = $self->construct_full_page($page, %opts);
	}

	return $page;
}

1;

__END__