Pod::WikiDoc


Pod-WikiDoc documentation Contained in the Pod-WikiDoc distribution.

Index


Code Index:


Pod-WikiDoc documentation Contained in the Pod-WikiDoc distribution.
package Pod::WikiDoc;
use strict;
use warnings;
use vars qw($VERSION );
$VERSION     = '0.18';

use 5.006;
use Carp;
use IO::String;
use Scalar::Util qw( blessed );
use Pod::WikiDoc::Parser;

#--------------------------------------------------------------------------#
# PREAMBLE DOCUMENTATION
#--------------------------------------------------------------------------#

#--------------------------------------------------------------------------#
# PUBLIC METHODS
#--------------------------------------------------------------------------#

### == {new}
### 
###     $parser = Pod::WikiDoc->new( \%args );
### 
### Constructor for a new Pod::WikiDoc object.  It takes a single, optional 
### argument: a hash reference with the following optional keys:
###
### * {comment_blocks}: if true, Pod::WikiDoc will scan for wikidoc in comment
### blocks.  Default is false.
### * {comment_prefix_length}: the number of leading sharp (#) symbols to 
### denote a comment block.  Default is 3.
### * {keywords}: a hash reference with keywords and values for keyword
### substitution

my %default_args = (
    comment_blocks         => 0,
    comment_prefix_length  => 3,
    keywords               => {},
);

sub new {
    my ( $class, $args ) = @_;

    croak "Error: Class method new() can't be called on an object"
        if ref $class;

    croak "Error: Argument to new() must be a hash reference"
        if $args && ref $args ne 'HASH';
        
    my $self = { %default_args };

    # pick up any specified arguments;
    for my $key ( keys %default_args ) {
        if ( exists $args->{$key} ) {
            $self->{$key} = $args->{$key};
        }
    }

    # load up a parser 
    $self->{parser} = Pod::WikiDoc::Parser->new();
    
    return bless $self, $class;
}

### == {convert}
### 
###     my $pod_text = $parser->convert( $input_text );
### 
### Given a string with valid Pod and/or wikidoc markup, filter/translate it to
### Pod.  This is really just a wrapper around {filter} for working with
### strings rather than files, and provides similar behavior, including adding
### a 'Generated by' header.

sub convert {
    my ($self, $input_string) = @_;

    croak "Error: Argument to convert() must be a scalar"
        if ( ref \$input_string ne 'SCALAR' );
        
    my $input_fh = IO::String->new( $input_string );
    my $output_fh = IO::String->new();
    _filter_podfile( $self, $input_fh, $output_fh );
    
    return ${ $output_fh->string_ref() };
}

### == {filter}
### 
###     $parser->filter( \%args );
### 
### Filters from an input file for Pod and wikidoc, translating it to Pod 
### and writing it to an output file.  The output file will be prefixed with
### a 'Generated by' comment with the version of Pod::WikiDoc and timestamp,
### as required by [perlpodspec].
###
### {filter} takes a single, optional argument: a hash reference with 
### the following optional keys:
###
### * {input}: a filename or filehandle to read from. Defaults to STDIN.  
### * {output}: a filename or filehandle to write to.  If given a filename
### and the file already exists, it will be clobbered. Defaults to STDOUT.

sub filter {
    my ( $self, $args_ref ) = @_;
    
    croak "Error: Argument to filter() must be a hash reference"
        if defined $args_ref && ref($args_ref) ne 'HASH';
    # setup input
    my $input_fh;
    if ( ! $args_ref->{input} ) {
        $input_fh = \*STDIN;
    }
    elsif ( ( blessed $args_ref->{input} && $args_ref->{input}->isa('GLOB') )
         || ( ref $args_ref->{input}  eq 'GLOB' ) 
         || ( ref \$args_ref->{input} eq 'GLOB' ) ) {
        # filehandle or equivalent
        $input_fh = $args_ref->{input};
    } 
    elsif ( ref \$args_ref->{input} eq 'SCALAR' ) {
        # filename
        open( $input_fh, "<", $args_ref->{input} )
            or croak "Error: Couldn't open input file '$args_ref->{input}': $!";
    }
    else {
        croak "Error: 'input' parameter for filter() must be a filename or filehandle"
    }
    
    # setup output
    my $output_fh;
    if ( ! $args_ref->{output} ) {
        $output_fh = \*STDOUT;
    }
    elsif ( ( blessed $args_ref->{output} && $args_ref->{output}->isa('GLOB') )
         || ( ref $args_ref->{output}  eq 'GLOB' ) 
         || ( ref \$args_ref->{output} eq 'GLOB' ) ) {
        # filehandle or equivalent
        $output_fh = $args_ref->{output};
    } 
    elsif ( ref \$args_ref->{output} eq 'SCALAR' ) {
        # filename
        open( $output_fh, ">", $args_ref->{output} )
            or croak "Error: Couldn't open output file '$args_ref->{output}': $!";
    }
    else {
        croak "Error: 'output' parameter for filter() must be a filename or filehandle"
    }
    
    _filter_podfile( $self, $input_fh, $output_fh );
    return;
}

### == {format}
###
###     my $pod_text = $parser->format( $wiki_text );
### 
### Given a string with valid Pod and/or wikidoc markup, filter/translate it to
### Pod. Unlike {convert}, no 'Generated by' comment is added.  This 
### function is used internally by Pod::WikiDoc, but is being made available
### as a public method for users who want more granular control of the 
### translation process or who want to convert wikidoc to Pod for other
### creative purposes using the Pod::WikiDoc engine.

sub format { ## no critic
    my ($self, $wikitext) = @_;
    
    croak "Error: Argument to format() must be a scalar"
        if ( ref \$wikitext ne 'SCALAR' );
        
    my $wiki_tree  = $self->{parser}->WikiDoc( $wikitext ) ;
    for my $node ( @$wiki_tree ) {
        undef $node if ! ref $node;
    }

    return _wiki2pod( $wiki_tree, $self->{keywords} );
}

#--------------------------------------------------------------------------#
# PRIVATE METHODS
#--------------------------------------------------------------------------#

#--------------------------------------------------------------------------#
# _comment_block_regex
#
# construct a regex dynamically for the right comment prefix
#--------------------------------------------------------------------------#

sub _comment_block_regex {
    my ( $self ) = @_;
    my $length = $self->{comment_prefix_length};
    return qr/\A#{$length}(?:\s(.*))?\z/ms;
}

#--------------------------------------------------------------------------#
# _input_iterator
#
# return an iterator that streams a filehandle. Action arguments:
#     'peek' -- lookahead at the next line without consuming it
#     'next' and 'drop' -- synonyms to consume and return the next line
#--------------------------------------------------------------------------#

sub _input_iterator {
    my ($self, $fh) = @_;
    my @head;
    return sub {
        my ($action) = @_;
        if ($action eq 'peek') {
            push @head, scalar <$fh> unless @head;
            return $head[0];
        }
        elsif ( $action eq 'drop' || $action eq 'next' ) {
            return shift @head if @head;
            return scalar <$fh>;
        }
        else {
            croak "Unrecognized iterator action '$action'\n";
        }
    }
}

#--------------------------------------------------------------------------#
# _exhaust_iterator
#
# needed to help abort processing
#--------------------------------------------------------------------------#

sub _exhaust_iterator {
    my ($self, $iter) = @_;
    1 while $iter->();
    return;
}

#--------------------------------------------------------------------------#
# _output_iterator
#
# returns an output "iterator" that streams to a filehandle.  Inputs
# are array refs of the form [ $FORMAT, @LINES ].  Format 'pod' is 
# printed to the filehandle immediately.  Format 'wikidoc' is accumulated
# until the next 'pod' then converted to wikidoc and printed to the file
# handle
#--------------------------------------------------------------------------#

sub _output_iterator {
    my ($self, $fh) = @_;
    my @wikidoc;
    return sub {
        my ($chunk) = @_;
        if ($chunk eq 'flush') {
            print {$fh} $self->format( join(q{}, splice(@wikidoc,0) ) )
                if @wikidoc;
            return;
        }
        return unless ref($chunk) eq 'ARRAY';
        my ($format, @lines) = @$chunk;
        if ( $format eq 'wikidoc' ) {
            push @wikidoc, @lines;
        }
        elsif ( $format eq 'pod' ) {
            print {$fh} $self->format( join(q{}, splice(@wikidoc,0) ) )
                if @wikidoc;
            print {$fh} @lines;
        }
        return;
    }
}

#--------------------------------------------------------------------------#
# _filter_podfile() 
#
# extract Pod from input and pass through to output, converting any wikidoc
# markup to Pod in the process
#--------------------------------------------------------------------------#

my $BLANK_LINE = qr{\A \s* \z}xms;
my $NON_BLANK_LINE = qr{\A \s* \S }xms;
my $FORMAT_LABEL = qr{:? [-a-zA-Z0-9_]+}xms;
my $POD_CMD = qr{\A =[a-zA-Z]+}xms;
my $BEGIN = qr{\A =begin \s+ ($FORMAT_LABEL)  \s* \z}xms;
my $END   = qr{\A =end   \s+ ($FORMAT_LABEL)  \s* \z}xms;
my $FOR   = qr{\A =for   \s+ ($FORMAT_LABEL)  [ \t]* (.*) \z}xms;
my $POD   = qr{\A =pod                          \s* \z}xms;
my $CUT   = qr{\A =cut                          \s* \z}xms;

sub _filter_podfile {
    my ($self, $input_fh, $output_fh) = @_;

    # open output with tag and Pod marker
    print $output_fh
          "# Generated by Pod::WikiDoc version $VERSION\n\n";
    print $output_fh "=pod\n\n";
    
    # setup iterators
    my $in_iter = $self->_input_iterator( $input_fh );
    my $out_iter = $self->_output_iterator( $output_fh );

    # starting filter mode is code
    $self->_filter_code( $in_iter, $out_iter );
    $out_iter->('flush');

    return;
}

#--------------------------------------------------------------------------#
# _filter_code
#
# we need a "cutting" flag -- if we got here from a =cut, then we return to 
# caller ( pod or format ) when we see pod. Otherwise we're just starting
# and need to start a new pod filter when we see pod
#
# perlpodspec says starting Pod with =cut is an error and that we 
# *must* halt parsing and *should* issue a warning. Here we might be
# far down the call stack and don't want to just return where the caller
# might continue processing.  To avoid this, we exhaust the input first.
#--------------------------------------------------------------------------#

sub _filter_code {
    my ($self, $in_iter, $out_iter, $cutting) = @_;
    my $CBLOCK = _comment_block_regex($self);
    CODE: while ( defined( my $peek = $in_iter->('peek') ) ) {
        $peek =~ $CBLOCK && do {
            $self->_filter_cblock( $in_iter, $out_iter );
            next CODE;
        };
        $peek =~ $CUT && do {
            warn "Can't start Pod with '$peek'\n";
            $self->_exhaust_iterator( $in_iter );
            last CODE;
        };
        $peek =~ $POD_CMD && do {
            last CODE if $cutting;
            $self->_filter_pod( $in_iter, $out_iter );
            next CODE;
        };
        do { $in_iter->('drop') };
    }
    return;
}

#--------------------------------------------------------------------------#
# _filter_pod
#
# Pass through lines to the output iterators, but flag wikidoc lines 
# differently so that they can be converted on output
#
# If we find an =end that is out of order, perlpodspec says we *must* warn
# and *may* halt.  Instead of halting, we return to the caller in the
# hopes that an earlier format might match this =end.
#--------------------------------------------------------------------------#

sub _filter_pod {
    my ($self, $in_iter, $out_iter) = @_;
    my @format = (); # no format to start
    # process the pod block -- recursing as necessary
    LINE: while ( defined( my $peek = $in_iter->('peek') ) ) {
        $peek =~ $POD && do {
            $in_iter->('drop');
            next LINE;
        };
        $peek =~ $CUT && do { 
            $in_iter->('drop');
            $self->_filter_code( $in_iter, $out_iter, 1 );
            next LINE;
        };
        $peek =~ $FOR && do {
            $self->_filter_for( $in_iter, $out_iter );
            next LINE;
        };
        $peek =~ $END && do {
            if ( ! @format ) {
                warn "Error: '$peek' doesn't match any '=begin $1'\n";
                $in_iter->('drop');
                next LINE;
            }
            elsif ( $format[-1] ne $1 ) {
                warn "Error: '$peek' doesn't match '=begin $format[-1]'\n";
                pop @format; # try an earlier format
                redo LINE;
            }
            elsif ( $format[-1] eq 'wikidoc' ) {
                pop @format;
                $in_iter->('drop');
                next LINE;
            }
            else {
                pop @format;
                # and let it fall through to the output iterator
            }
        };
        $peek =~ $BEGIN && do {
            if ( $1 eq 'wikidoc' ) {
                push @format, 'wikidoc';
                $in_iter->('drop');
                next LINE;
            }
            else {
                push @format, $1;
                # and let it fall through to the output iterator
            }
        };
        do { 
            my $out_type = 
                ( @format && $format[-1] eq 'wikidoc' ) ? 'wikidoc' : 'pod' ; 
            $out_iter->( [ $out_type, $in_iter->('next') ] ) 
        };
    }
    return;
}

#--------------------------------------------------------------------------#
# _filter_for
#--------------------------------------------------------------------------#

sub _filter_for {
    my ($self, $in_iter, $out_iter) = @_;
    my $for_line = $in_iter->('next');
    my ($format, $rest) = $for_line =~ $FOR;
    $rest ||= "\n";

    my @lines = ( $format eq 'wikidoc' ? $rest : $for_line );

    LINE: while ( defined( my $peek = $in_iter->('peek') ) ) {
        $peek =~ $BLANK_LINE && do {
            last LINE;
        };
        do {
            push @lines, $in_iter->('next');
        };
    }
    if ($format eq 'wikidoc' ) {
        $in_iter->('drop'); # wikidoc will append \n
    }
    else {
        push @lines, $in_iter->('next');
    }
    my $out_type =  $format eq 'wikidoc' ? 'wikidoc' : 'pod' ; 
    $out_iter->( [ $out_type, @lines ] ); 
    return;
}

#--------------------------------------------------------------------------#
# _filter_cblock
#--------------------------------------------------------------------------#

sub _filter_cblock {
    my ($self, $in_iter, $out_iter) = @_;
    my @lines = ($1 ? $1 : "\n"); ## no critic
    $in_iter->('next');
    my $CBLOCK = _comment_block_regex($self);
    LINE: while ( defined( my $peek = $in_iter->('peek') ) ) {
        last LINE if $peek !~ $CBLOCK;
        push @lines, ($1 ? $1 : "\n");
        $in_iter->('next');
    }
    $out_iter->( [ 'wikidoc', @lines ] ) if $self->{comment_blocks};
    return;
}


#--------------------------------------------------------------------------#
# Translation functions and tables
#--------------------------------------------------------------------------#

#--------------------------------------------------------------------------#
# Tables for formatting
#--------------------------------------------------------------------------#

# Used in closure for counting numbered lists
my $numbered_bullet;

# Text to print at start of entity from parse tree, or a subroutine
# to generate the text programmatically
my %opening_of = (
    Paragraph           =>  q{},
    Unordered_List      =>  "=over\n\n",
    Ordered_List        =>  sub { $numbered_bullet = 1; return "=over\n\n" },
    Preformat           =>  q{},
    Header              =>  sub { 
                                my $node = shift; 
                                my $level = $node->{level} > 4 
                                    ? 4 : $node->{level};
                                return "=head$level "
                            },
    Bullet_Item         =>  "=item *\n\n",
    Numbered_Item       =>  sub { 
                                return  "=item " . $numbered_bullet++ 
                                        . ".\n\n" 
                            },
    Indented_Line       =>  q{ },
    Plain_Line          =>  q{},
    Empty_Line          =>  q{ },
    Parens              =>  "(",
    RegularText         =>  q{},
    EscapedChar         =>  q{},
    WhiteSpace          =>  q{},
    InlineCode          =>  "C<<< ",
    BoldText            =>  'B<',
    ItalicText          =>  'I<',
    KeyWord             =>  q{},
    LinkContent         =>  'L<',
    LinkLabel           =>  q{},
    LinkTarget          =>  q{},
);

# Text to print at end of entity from parse tree, or a subroutine
# to generate the text programmatically
my %closing_of = (
    Paragraph           =>  "\n",
    Unordered_List      =>  "=back\n\n",
    Ordered_List        =>  "=back\n\n",
    Preformat           =>  "\n",
    Header              =>  "\n\n",
    Bullet_Item         =>  "\n\n",
    Numbered_Item       =>  "\n\n",
    Indented_Line       =>  "\n",
    Plain_Line          =>  "\n",
    Empty_Line          =>  "\n",
    Parens              =>  ")",
    RegularText         =>  q{},
    EscapedChar         =>  q{},
    WhiteSpace          =>  q{},
    InlineCode          =>  " >>>",
    BoldText            =>  ">",
    ItalicText          =>  ">",
    KeyWord             =>  q{},
    LinkContent         =>  q{>},
    LinkLabel           =>  q{|},
    LinkTarget          =>  q{},
);

# Subroutine to handle actual raw content from different node types
# from the parse tree
my %content_handler_for = (
    RegularText         =>  \&_escape_pod, 
    Empty_Line          =>  sub { q{} },
    KeyWord             =>  \&_keyword_expansion,
);

# Table of character to E<> code conversion
my %escape_code_for = (
    q{>} =>  "E<gt>",
    q{<} =>  "E<lt>",
    q{|} =>  "E<verbar>",
    q{/} =>  "E<sol>",
);

# List of characters that need conversion
my $specials = join q{}, keys %escape_code_for;

#--------------------------------------------------------------------------#
# _escape_pod()
#
# After removing backslash escapes from a text string, translates characters
# that must be escaped in Pod <, >, |, and / to their Pod E<> code equivalents
#
#--------------------------------------------------------------------------#

sub _escape_pod { 
    
    my $node = shift; 
    
    my $input_text  = $node->{content};
    
    # remove backslash escaping
    $input_text =~ s{ \\(.) } 
                                        {$1}gxms;
    
    # replace special symbols with corresponding escape code
    $input_text =~ s{ ( [$specials] ) } 
                                        {$escape_code_for{$1}}gxms; 

    return $input_text; 
}

#--------------------------------------------------------------------------#
# _keyword_expansion
#
# Given a keyword, return the corresponding value from the keywords
# hash or the keyword itself
#--------------------------------------------------------------------------#

sub _keyword_expansion {
    my ($node, $keywords) = @_;
    my $key = $node->{content};
    my $value = $keywords->{$key};
    return defined $value ? $value : q{%%} . $key . q{%%} ;
}

    
#--------------------------------------------------------------------------#
# _translate_wikidoc()
#
# given an array of wikidoc lines, joins them and runs them through
# the formatter
#--------------------------------------------------------------------------#

sub _translate_wikidoc {
    my ( $self, $wikidoc_ref ) = @_;
    return $self->format( join q{}, @$wikidoc_ref );
}

#--------------------------------------------------------------------------#
# _wiki2pod()
#
# recursive function that walks a Pod::WikiDoc::Parser tree and generates
# a string with the corresponding Pod
#--------------------------------------------------------------------------#

sub _wiki2pod {
    my ($nodelist, $keywords, $insert_space) = @_;
    my $result = q{};
    for my $node ( @$nodelist ) {
        # XXX print "$node\n" if ref $node ne 'HASH';
        my $opening = $opening_of{ $node->{type} };
        my $closing = $closing_of{ $node->{type} };

        $result .= ref $opening eq 'CODE' ? $opening->($node) : $opening;
        if ( ref $node->{content} eq 'ARRAY' ) {
            $result .= _wiki2pod( 
                $node->{content}, 
                $keywords,
                $node->{type} eq 'Preformat' ? 1 : 0 
            );
        }
        else {
            my $handler = $content_handler_for{ $node->{type} };
            $result .= defined $handler 
                     ? $handler->( $node, $keywords ) : $node->{content}
            ;
        }
        $result .= ref $closing eq 'CODE' ? $closing->($node) : $closing;
    }
    return $result;
}

1; #this line is important and will help the module return a true value
__END__