Pod::WikiDoc
package Pod::WikiDoc;
use strict;
use warnings;
use vars qw($VERSION );
$VERSION = '0.18';
use 5.006;
use Carp;
use IO::String;
use Scalar::Util qw( blessed );
use Pod::WikiDoc::Parser;
#--------------------------------------------------------------------------#
# PREAMBLE DOCUMENTATION
#--------------------------------------------------------------------------#
#--------------------------------------------------------------------------#
# PUBLIC METHODS
#--------------------------------------------------------------------------#
### == {new}
###
### $parser = Pod::WikiDoc->new( \%args );
###
### Constructor for a new Pod::WikiDoc object. It takes a single, optional
### argument: a hash reference with the following optional keys:
###
### * {comment_blocks}: if true, Pod::WikiDoc will scan for wikidoc in comment
### blocks. Default is false.
### * {comment_prefix_length}: the number of leading sharp (#) symbols to
### denote a comment block. Default is 3.
### * {keywords}: a hash reference with keywords and values for keyword
### substitution
my %default_args = (
comment_blocks => 0,
comment_prefix_length => 3,
keywords => {},
);
sub new {
my ( $class, $args ) = @_;
croak "Error: Class method new() can't be called on an object"
if ref $class;
croak "Error: Argument to new() must be a hash reference"
if $args && ref $args ne 'HASH';
my $self = { %default_args };
# pick up any specified arguments;
for my $key ( keys %default_args ) {
if ( exists $args->{$key} ) {
$self->{$key} = $args->{$key};
}
}
# load up a parser
$self->{parser} = Pod::WikiDoc::Parser->new();
return bless $self, $class;
}
### == {convert}
###
### my $pod_text = $parser->convert( $input_text );
###
### Given a string with valid Pod and/or wikidoc markup, filter/translate it to
### Pod. This is really just a wrapper around {filter} for working with
### strings rather than files, and provides similar behavior, including adding
### a 'Generated by' header.
sub convert {
my ($self, $input_string) = @_;
croak "Error: Argument to convert() must be a scalar"
if ( ref \$input_string ne 'SCALAR' );
my $input_fh = IO::String->new( $input_string );
my $output_fh = IO::String->new();
_filter_podfile( $self, $input_fh, $output_fh );
return ${ $output_fh->string_ref() };
}
### == {filter}
###
### $parser->filter( \%args );
###
### Filters from an input file for Pod and wikidoc, translating it to Pod
### and writing it to an output file. The output file will be prefixed with
### a 'Generated by' comment with the version of Pod::WikiDoc and timestamp,
### as required by [perlpodspec].
###
### {filter} takes a single, optional argument: a hash reference with
### the following optional keys:
###
### * {input}: a filename or filehandle to read from. Defaults to STDIN.
### * {output}: a filename or filehandle to write to. If given a filename
### and the file already exists, it will be clobbered. Defaults to STDOUT.
sub filter {
my ( $self, $args_ref ) = @_;
croak "Error: Argument to filter() must be a hash reference"
if defined $args_ref && ref($args_ref) ne 'HASH';
# setup input
my $input_fh;
if ( ! $args_ref->{input} ) {
$input_fh = \*STDIN;
}
elsif ( ( blessed $args_ref->{input} && $args_ref->{input}->isa('GLOB') )
|| ( ref $args_ref->{input} eq 'GLOB' )
|| ( ref \$args_ref->{input} eq 'GLOB' ) ) {
# filehandle or equivalent
$input_fh = $args_ref->{input};
}
elsif ( ref \$args_ref->{input} eq 'SCALAR' ) {
# filename
open( $input_fh, "<", $args_ref->{input} )
or croak "Error: Couldn't open input file '$args_ref->{input}': $!";
}
else {
croak "Error: 'input' parameter for filter() must be a filename or filehandle"
}
# setup output
my $output_fh;
if ( ! $args_ref->{output} ) {
$output_fh = \*STDOUT;
}
elsif ( ( blessed $args_ref->{output} && $args_ref->{output}->isa('GLOB') )
|| ( ref $args_ref->{output} eq 'GLOB' )
|| ( ref \$args_ref->{output} eq 'GLOB' ) ) {
# filehandle or equivalent
$output_fh = $args_ref->{output};
}
elsif ( ref \$args_ref->{output} eq 'SCALAR' ) {
# filename
open( $output_fh, ">", $args_ref->{output} )
or croak "Error: Couldn't open output file '$args_ref->{output}': $!";
}
else {
croak "Error: 'output' parameter for filter() must be a filename or filehandle"
}
_filter_podfile( $self, $input_fh, $output_fh );
return;
}
### == {format}
###
### my $pod_text = $parser->format( $wiki_text );
###
### Given a string with valid Pod and/or wikidoc markup, filter/translate it to
### Pod. Unlike {convert}, no 'Generated by' comment is added. This
### function is used internally by Pod::WikiDoc, but is being made available
### as a public method for users who want more granular control of the
### translation process or who want to convert wikidoc to Pod for other
### creative purposes using the Pod::WikiDoc engine.
sub format { ## no critic
my ($self, $wikitext) = @_;
croak "Error: Argument to format() must be a scalar"
if ( ref \$wikitext ne 'SCALAR' );
my $wiki_tree = $self->{parser}->WikiDoc( $wikitext ) ;
for my $node ( @$wiki_tree ) {
undef $node if ! ref $node;
}
return _wiki2pod( $wiki_tree, $self->{keywords} );
}
#--------------------------------------------------------------------------#
# PRIVATE METHODS
#--------------------------------------------------------------------------#
#--------------------------------------------------------------------------#
# _comment_block_regex
#
# construct a regex dynamically for the right comment prefix
#--------------------------------------------------------------------------#
sub _comment_block_regex {
my ( $self ) = @_;
my $length = $self->{comment_prefix_length};
return qr/\A#{$length}(?:\s(.*))?\z/ms;
}
#--------------------------------------------------------------------------#
# _input_iterator
#
# return an iterator that streams a filehandle. Action arguments:
# 'peek' -- lookahead at the next line without consuming it
# 'next' and 'drop' -- synonyms to consume and return the next line
#--------------------------------------------------------------------------#
sub _input_iterator {
my ($self, $fh) = @_;
my @head;
return sub {
my ($action) = @_;
if ($action eq 'peek') {
push @head, scalar <$fh> unless @head;
return $head[0];
}
elsif ( $action eq 'drop' || $action eq 'next' ) {
return shift @head if @head;
return scalar <$fh>;
}
else {
croak "Unrecognized iterator action '$action'\n";
}
}
}
#--------------------------------------------------------------------------#
# _exhaust_iterator
#
# needed to help abort processing
#--------------------------------------------------------------------------#
sub _exhaust_iterator {
my ($self, $iter) = @_;
1 while $iter->();
return;
}
#--------------------------------------------------------------------------#
# _output_iterator
#
# returns an output "iterator" that streams to a filehandle. Inputs
# are array refs of the form [ $FORMAT, @LINES ]. Format 'pod' is
# printed to the filehandle immediately. Format 'wikidoc' is accumulated
# until the next 'pod' then converted to wikidoc and printed to the file
# handle
#--------------------------------------------------------------------------#
sub _output_iterator {
my ($self, $fh) = @_;
my @wikidoc;
return sub {
my ($chunk) = @_;
if ($chunk eq 'flush') {
print {$fh} $self->format( join(q{}, splice(@wikidoc,0) ) )
if @wikidoc;
return;
}
return unless ref($chunk) eq 'ARRAY';
my ($format, @lines) = @$chunk;
if ( $format eq 'wikidoc' ) {
push @wikidoc, @lines;
}
elsif ( $format eq 'pod' ) {
print {$fh} $self->format( join(q{}, splice(@wikidoc,0) ) )
if @wikidoc;
print {$fh} @lines;
}
return;
}
}
#--------------------------------------------------------------------------#
# _filter_podfile()
#
# extract Pod from input and pass through to output, converting any wikidoc
# markup to Pod in the process
#--------------------------------------------------------------------------#
my $BLANK_LINE = qr{\A \s* \z}xms;
my $NON_BLANK_LINE = qr{\A \s* \S }xms;
my $FORMAT_LABEL = qr{:? [-a-zA-Z0-9_]+}xms;
my $POD_CMD = qr{\A =[a-zA-Z]+}xms;
my $BEGIN = qr{\A =begin \s+ ($FORMAT_LABEL) \s* \z}xms;
my $END = qr{\A =end \s+ ($FORMAT_LABEL) \s* \z}xms;
my $FOR = qr{\A =for \s+ ($FORMAT_LABEL) [ \t]* (.*) \z}xms;
my $POD = qr{\A =pod \s* \z}xms;
my $CUT = qr{\A =cut \s* \z}xms;
sub _filter_podfile {
my ($self, $input_fh, $output_fh) = @_;
# open output with tag and Pod marker
print $output_fh
"# Generated by Pod::WikiDoc version $VERSION\n\n";
print $output_fh "=pod\n\n";
# setup iterators
my $in_iter = $self->_input_iterator( $input_fh );
my $out_iter = $self->_output_iterator( $output_fh );
# starting filter mode is code
$self->_filter_code( $in_iter, $out_iter );
$out_iter->('flush');
return;
}
#--------------------------------------------------------------------------#
# _filter_code
#
# we need a "cutting" flag -- if we got here from a =cut, then we return to
# caller ( pod or format ) when we see pod. Otherwise we're just starting
# and need to start a new pod filter when we see pod
#
# perlpodspec says starting Pod with =cut is an error and that we
# *must* halt parsing and *should* issue a warning. Here we might be
# far down the call stack and don't want to just return where the caller
# might continue processing. To avoid this, we exhaust the input first.
#--------------------------------------------------------------------------#
sub _filter_code {
my ($self, $in_iter, $out_iter, $cutting) = @_;
my $CBLOCK = _comment_block_regex($self);
CODE: while ( defined( my $peek = $in_iter->('peek') ) ) {
$peek =~ $CBLOCK && do {
$self->_filter_cblock( $in_iter, $out_iter );
next CODE;
};
$peek =~ $CUT && do {
warn "Can't start Pod with '$peek'\n";
$self->_exhaust_iterator( $in_iter );
last CODE;
};
$peek =~ $POD_CMD && do {
last CODE if $cutting;
$self->_filter_pod( $in_iter, $out_iter );
next CODE;
};
do { $in_iter->('drop') };
}
return;
}
#--------------------------------------------------------------------------#
# _filter_pod
#
# Pass through lines to the output iterators, but flag wikidoc lines
# differently so that they can be converted on output
#
# If we find an =end that is out of order, perlpodspec says we *must* warn
# and *may* halt. Instead of halting, we return to the caller in the
# hopes that an earlier format might match this =end.
#--------------------------------------------------------------------------#
sub _filter_pod {
my ($self, $in_iter, $out_iter) = @_;
my @format = (); # no format to start
# process the pod block -- recursing as necessary
LINE: while ( defined( my $peek = $in_iter->('peek') ) ) {
$peek =~ $POD && do {
$in_iter->('drop');
next LINE;
};
$peek =~ $CUT && do {
$in_iter->('drop');
$self->_filter_code( $in_iter, $out_iter, 1 );
next LINE;
};
$peek =~ $FOR && do {
$self->_filter_for( $in_iter, $out_iter );
next LINE;
};
$peek =~ $END && do {
if ( ! @format ) {
warn "Error: '$peek' doesn't match any '=begin $1'\n";
$in_iter->('drop');
next LINE;
}
elsif ( $format[-1] ne $1 ) {
warn "Error: '$peek' doesn't match '=begin $format[-1]'\n";
pop @format; # try an earlier format
redo LINE;
}
elsif ( $format[-1] eq 'wikidoc' ) {
pop @format;
$in_iter->('drop');
next LINE;
}
else {
pop @format;
# and let it fall through to the output iterator
}
};
$peek =~ $BEGIN && do {
if ( $1 eq 'wikidoc' ) {
push @format, 'wikidoc';
$in_iter->('drop');
next LINE;
}
else {
push @format, $1;
# and let it fall through to the output iterator
}
};
do {
my $out_type =
( @format && $format[-1] eq 'wikidoc' ) ? 'wikidoc' : 'pod' ;
$out_iter->( [ $out_type, $in_iter->('next') ] )
};
}
return;
}
#--------------------------------------------------------------------------#
# _filter_for
#--------------------------------------------------------------------------#
sub _filter_for {
my ($self, $in_iter, $out_iter) = @_;
my $for_line = $in_iter->('next');
my ($format, $rest) = $for_line =~ $FOR;
$rest ||= "\n";
my @lines = ( $format eq 'wikidoc' ? $rest : $for_line );
LINE: while ( defined( my $peek = $in_iter->('peek') ) ) {
$peek =~ $BLANK_LINE && do {
last LINE;
};
do {
push @lines, $in_iter->('next');
};
}
if ($format eq 'wikidoc' ) {
$in_iter->('drop'); # wikidoc will append \n
}
else {
push @lines, $in_iter->('next');
}
my $out_type = $format eq 'wikidoc' ? 'wikidoc' : 'pod' ;
$out_iter->( [ $out_type, @lines ] );
return;
}
#--------------------------------------------------------------------------#
# _filter_cblock
#--------------------------------------------------------------------------#
sub _filter_cblock {
my ($self, $in_iter, $out_iter) = @_;
my @lines = ($1 ? $1 : "\n"); ## no critic
$in_iter->('next');
my $CBLOCK = _comment_block_regex($self);
LINE: while ( defined( my $peek = $in_iter->('peek') ) ) {
last LINE if $peek !~ $CBLOCK;
push @lines, ($1 ? $1 : "\n");
$in_iter->('next');
}
$out_iter->( [ 'wikidoc', @lines ] ) if $self->{comment_blocks};
return;
}
#--------------------------------------------------------------------------#
# Translation functions and tables
#--------------------------------------------------------------------------#
#--------------------------------------------------------------------------#
# Tables for formatting
#--------------------------------------------------------------------------#
# Used in closure for counting numbered lists
my $numbered_bullet;
# Text to print at start of entity from parse tree, or a subroutine
# to generate the text programmatically
my %opening_of = (
Paragraph => q{},
Unordered_List => "=over\n\n",
Ordered_List => sub { $numbered_bullet = 1; return "=over\n\n" },
Preformat => q{},
Header => sub {
my $node = shift;
my $level = $node->{level} > 4
? 4 : $node->{level};
return "=head$level "
},
Bullet_Item => "=item *\n\n",
Numbered_Item => sub {
return "=item " . $numbered_bullet++
. ".\n\n"
},
Indented_Line => q{ },
Plain_Line => q{},
Empty_Line => q{ },
Parens => "(",
RegularText => q{},
EscapedChar => q{},
WhiteSpace => q{},
InlineCode => "C<<< ",
BoldText => 'B<',
ItalicText => 'I<',
KeyWord => q{},
LinkContent => 'L<',
LinkLabel => q{},
LinkTarget => q{},
);
# Text to print at end of entity from parse tree, or a subroutine
# to generate the text programmatically
my %closing_of = (
Paragraph => "\n",
Unordered_List => "=back\n\n",
Ordered_List => "=back\n\n",
Preformat => "\n",
Header => "\n\n",
Bullet_Item => "\n\n",
Numbered_Item => "\n\n",
Indented_Line => "\n",
Plain_Line => "\n",
Empty_Line => "\n",
Parens => ")",
RegularText => q{},
EscapedChar => q{},
WhiteSpace => q{},
InlineCode => " >>>",
BoldText => ">",
ItalicText => ">",
KeyWord => q{},
LinkContent => q{>},
LinkLabel => q{|},
LinkTarget => q{},
);
# Subroutine to handle actual raw content from different node types
# from the parse tree
my %content_handler_for = (
RegularText => \&_escape_pod,
Empty_Line => sub { q{} },
KeyWord => \&_keyword_expansion,
);
# Table of character to E<> code conversion
my %escape_code_for = (
q{>} => "E<gt>",
q{<} => "E<lt>",
q{|} => "E<verbar>",
q{/} => "E<sol>",
);
# List of characters that need conversion
my $specials = join q{}, keys %escape_code_for;
#--------------------------------------------------------------------------#
# _escape_pod()
#
# After removing backslash escapes from a text string, translates characters
# that must be escaped in Pod <, >, |, and / to their Pod E<> code equivalents
#
#--------------------------------------------------------------------------#
sub _escape_pod {
my $node = shift;
my $input_text = $node->{content};
# remove backslash escaping
$input_text =~ s{ \\(.) }
{$1}gxms;
# replace special symbols with corresponding escape code
$input_text =~ s{ ( [$specials] ) }
{$escape_code_for{$1}}gxms;
return $input_text;
}
#--------------------------------------------------------------------------#
# _keyword_expansion
#
# Given a keyword, return the corresponding value from the keywords
# hash or the keyword itself
#--------------------------------------------------------------------------#
sub _keyword_expansion {
my ($node, $keywords) = @_;
my $key = $node->{content};
my $value = $keywords->{$key};
return defined $value ? $value : q{%%} . $key . q{%%} ;
}
#--------------------------------------------------------------------------#
# _translate_wikidoc()
#
# given an array of wikidoc lines, joins them and runs them through
# the formatter
#--------------------------------------------------------------------------#
sub _translate_wikidoc {
my ( $self, $wikidoc_ref ) = @_;
return $self->format( join q{}, @$wikidoc_ref );
}
#--------------------------------------------------------------------------#
# _wiki2pod()
#
# recursive function that walks a Pod::WikiDoc::Parser tree and generates
# a string with the corresponding Pod
#--------------------------------------------------------------------------#
sub _wiki2pod {
my ($nodelist, $keywords, $insert_space) = @_;
my $result = q{};
for my $node ( @$nodelist ) {
# XXX print "$node\n" if ref $node ne 'HASH';
my $opening = $opening_of{ $node->{type} };
my $closing = $closing_of{ $node->{type} };
$result .= ref $opening eq 'CODE' ? $opening->($node) : $opening;
if ( ref $node->{content} eq 'ARRAY' ) {
$result .= _wiki2pod(
$node->{content},
$keywords,
$node->{type} eq 'Preformat' ? 1 : 0
);
}
else {
my $handler = $content_handler_for{ $node->{type} };
$result .= defined $handler
? $handler->( $node, $keywords ) : $node->{content}
;
}
$result .= ref $closing eq 'CODE' ? $closing->($node) : $closing;
}
return $result;
}
1; #this line is important and will help the module return a true value
__END__