PPI::HTML - Generate syntax-hightlighted HTML for Perl using PPI


PPI-HTML documentation Contained in the PPI-HTML distribution.

Index


Code Index:

NAME

Top

PPI::HTML - Generate syntax-hightlighted HTML for Perl using PPI

SYNOPSIS

Top

  use PPI;
  use PPI::HTML;

  # Load your Perl file
  my $Document = PPI::Document->load( 'script.pl' );

  # Create a reusable syntax highlighter
  my $Highlight = PPI::HTML->new( line_numbers => 1 );

  # Spit out the HTML
  print $Highlight->html( $Document );

DESCRIPTION

Top

PPI::HTML converts Perl documents into syntax highlighted HTML pages.

HISTORY

Top

PPI::HTML is the successor to the now-redundant PPI::Format::HTML.

While early on it was thought that the same formatting code might be able to be used for a variety of different types of things (ANSI and HTML for example) later developments with the here-doc code and the need for independantly written serializers meant that this idea had to be discarded.

In addition, the old module only made use of the Tokenizer, and had a pretty shit API to boot.

API Overview

The new module is much cleaner. Simply create an object with the options you want, pass PPI::Document objects to the html method, and you get strings of HTML that you can do whatever you want with.

METHODS

Top

new %args

The new constructor takes a simple set of key/value pairs to define the formatting options for the HTML.

page

Is the page option is enabled, the generator will wrap the generated HTML fragment in a basic but complete page.

line_numbers

At the present time, the only option available. If set to true, line numbers are added to the output.

colors | colours

For cases where you don't want to use an external stylesheet, you can provide colors as a hash reference where the keys are CSS classes (generally matching the token name) and the values are colours.

This allows basic colouring without the need for a whole stylesheet.

css

The css option lets you provide a custom CSS::Tiny object containing any CSS you want to apply to the page (if you are using page mode).

If both the colors and css options are used, the colour CSS entries will overwrite anything contained in the CSS::Tiny object. The object will also be cloned if it to be modified, to prevent destroying any CSS objects passed in.

Returns a new PPI::HTML object

css

The css accessor returns the CSS::Tiny object originally provided to the constructor.

html $Document | $file | \$source

The main method for the class, the html method takes a single PPI::Document object, or anything that can be turned into a PPI::Document via its new method, and returns a string of HTML formatted based on the arguments given to the PPI::HTML constructor.

Returns a string, or undef on error.

SUPPORT

Top

Bugs should always be submitted via the CPAN bug tracker

http://rt.cpan.org/NoAuth/ReportBug.html?Queue=PPI-HTML

For other issues, contact the maintainer

AUTHOR

Top

Adam Kennedy <adamk@cpan.org>

Funding provided by The Perl Foundation

SEE ALSO

Top

http://ali.as/, PPI

COPYRIGHT

Top


PPI-HTML documentation Contained in the PPI-HTML distribution.
package PPI::HTML;

use 5.005;
use strict;
use CSS::Tiny           ();
use PPI::Document       ();
use PPI::HTML::Fragment ();
use Params::Util '_HASH', '_INSTANCE';

use vars qw{$VERSION};
BEGIN {
	$VERSION = '1.08';
}





#####################################################################
# Constructor and Accessors

sub new {
	my $class = ref $_[0] ? ref shift : shift;
	my %args  = @_;

	# Create the basic object
	my $self = bless {
		line_numbers => !! $args{line_numbers},
		page         => !! $args{page},
		# colors     => undef,
		# css        => undef,
		}, $class;

	# Manually specify the class colours and custom CSS
	$args{colors}   = delete $args{colours} if $args{colours};
	$self->{colors} = $args{colors}         if _HASH($args{colors});
	$self->{css}    = $args{css}            if _INSTANCE($args{css}, 'CSS::Tiny');

	$self;
}

sub css { $_[0]->{css} }





#####################################################################
# Main Methods

sub html {
	my $self     = shift;
	my $Document = $self->_Document(shift) or return undef;

	# Build the basic set of fragments
	$self->_build_fragments($Document) or return undef;

	# Interleave the line numbers
	$self->_build_line_numbers or return undef;

	# Optimise
	$self->_optimize_fragments or return undef;

	# Merge and stringify the fragments
	$self->_build_html or return undef;

	# Return the final HTML
	delete $self->{html};
}

# Create the basic list of fragments
sub _build_fragments {
	my ($self, $Document) = @_;

	# Convert the list of tokens to a list of fragments
	$self->{fragments}      = [];
	$self->{heredoc_buffer} = undef;
	foreach my $Token ( $Document->tokens ) {
		# Find the Fragments for the token
		my @fragments = ();
		if ( _INSTANCE($Token, 'PPI::Token::HereDoc') ) {
			@fragments = $self->_heredoc_fragments($Token) or return undef;
		} else {
			@fragments = $self->_simple_fragments($Token) or return undef;
		}

		# Add the fragments
		foreach my $Fragment ( @fragments ) {
			$self->_add_fragment( $Fragment ) or return undef;
		}
	}

	# Are there any trailing heredoc lines to add?
	if ( $self->{heredoc_buffer} ) {
		# Unless the last line ends in a newline, add one
		unless ( $self->{fragments}->[-1]->ends_line ) {
			my $Fragment = PPI::HTML::Fragment->new( "\n" ) or return undef;
			push @{$self->{fragments}}, $Fragment;
		}

		# Add the remaining buffer lines
		push @{$self->{fragments}}, @{$self->{heredoc_buffer}};
	}

	# We don't need the heredoc buffer any more
	delete $self->{heredoc_buffer};

	1;
}

sub _simple_fragments {
	my ($self, $Token) = @_;

	# Split the token content into strings
	my @strings = grep { defined $_ and length $_ } split /(?<=\n)/, $Token->content;

	# Convert each string to a fragment
	my @fragments = ();
	my $css_class = $self->_css_class( $Token );
	foreach my $string ( @strings ) {
		my $Fragment = PPI::HTML::Fragment->new( $string, $css_class ) or return ();
		push @fragments, $Fragment;
	}

	@fragments;
}

sub _heredoc_fragments {
	my ($self, $Token) = @_;

	# First, create the heredoc content lines and add them
	# to the buffer
	foreach my $line ( $Token->heredoc ) {
		$self->_add_heredoc( $line,
			'heredoc_content' ) or return ();
	}

	# Add the terminator line
	$self->_add_heredoc( $Token->terminator . "\n",
		'heredoc_terminator' ) or return ();

	# Return a single fragment for the main content part
	my $Fragment = PPI::HTML::Fragment->new( $Token->content,
		$self->_css_class( $Token ) ) or return ();

	$Fragment;
}

sub _build_line_numbers {
	my $self = shift;
	return 1 unless $self->{line_numbers};

	# Find the width of the highest line number, so that
	# we can pad the line numbers
	my $max     = 1 + scalar map { $_->ends_line } @{$self->{fragments}};
	my $width   = length("$max");
	my $pattern = "\%${width}s: ";

	# Iterate over the existing array, and insert new line
	# fragments after each newline.
	my $line = 1;
	my @fragments = map {
		$_->ends_line
			? ($_, $self->_line_fragment( sprintf($pattern, ++$line) ))
			: ($_)
		} @{$self->{fragments}};

	# Add the fragment for line 1 to the beginning
	unshift @fragments, $self->_line_fragment( sprintf($pattern, 1) );

	$self->{fragments} = \@fragments;

	1;
}

sub _build_html {
	my $self = shift;

	# Iterate over the loop, stringifying and merging
	my $html = '';
	foreach my $Fragment ( @{$self->{fragments}} ) {
		$html .= $Fragment->html;
	}

	# Page wrap if needed
	if ( $self->{page} ) {
		my $css = $self->_css_html;

		$html = <<END_HTML;
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN">
<html>
<head>
  <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
  <meta name="robots" content="noarchive">
$css
</head>
<body bgcolor="#FFFFFF" text="#000000"><pre>$html</pre></body>
</html>
END_HTML
	}

	# Replace the fragments array with the HTML
	$self->{html} = $html;
	delete $self->{fragments};

	1;
}

sub _optimize_fragments {
	my $self = shift;

	# Iterate through and do the simplest optimisation layer,
	# when is joining identical adjacent fragments.
	my $current = $self->{fragments};
	my @fragments = ( shift @$current );
	foreach my $Fragment ( @$current ) {
		if ( $Fragment->css and $fragments[-1]->css and $Fragment->css eq $fragments[-1]->css ) {
			$fragments[-1]->concat( $Fragment->string );
		} else {
			push @fragments, $Fragment;
		}
	}

	# Remove the class from all whitespace
	foreach my $Fragment ( @fragments ) {
		my $css = $Fragment->css or next;
		$Fragment->clear if $css eq 'whitespace';
	}

	# If we know what classes are coloured, strip the style
	# from everything that doesn't have a colour.
	if ( $self->{colors} ) {
		my $colors = $self->{colors};
		foreach my $Fragment ( @fragments ) {
			my $css = $Fragment->css or next;
			next if $colors->{$css};
			$Fragment->clear;
		}
	}

	# Overwrite the fragments list
	$self->{fragments} = \@fragments;

	1;
}

# For a set of colors, generate the relevant CSS
sub _css_html {
	my $self = shift;

	# Create and fill a CSS object
	my $css = $self->{css}
		? $self->{css}->clone
		: CSS::Tiny->new;
	foreach my $key ( sort keys %{$self->{colors}} ) {
		$css->{".$key"}->{color} = $self->{colors}->{$key};
	}

	keys %$css ? $css->html : '';
}





#####################################################################
# Support Methods

# Create a Document from anything we can
sub _Document {
	my $class = shift;
	_INSTANCE( $_[0], 'PPI::Document' )
		? $_[0]                        # Already a Document
		: PPI::Document->new( @_ ); # Make a Document
}

# Create a Fragment from anything we can
sub _Fragment {
	my $class = shift;
	_INSTANCE( $_[0], 'PPI::HTML::Fragment' )
		? $_[0] 
		: PPI::HTML::Fragment->new( @_ );
}

sub _add_fragment {
	my $self     = shift;
	my $Fragment = $self->_Fragment(@_) or return undef;

	# Add the fragment itself
	push @{$self->{fragments}}, $Fragment;

	# If the fragment ends a line, add
	# anything that is in the heredoc buffer.
	if ( $self->{heredoc_buffer} and $Fragment->ends_line ) {
		push @{$self->{fragments}}, @{$self->{heredoc_buffer}};
		$self->{heredoc_buffer} = undef;
	}

	1;
}

sub _add_heredoc {
	my $self     = shift;
	my $Fragment = $self->_Fragment(@_) or return undef;
	$self->{heredoc_buffer} ||= [];
	push @{$self->{heredoc_buffer}}, $Fragment;
	1;
}

sub _line_fragment {
	my ($self, $line) = @_;
	PPI::HTML::Fragment->new( $line, 'line_number' );
}

sub _css_class {
	my ($self, $Token) = @_;
	if ( $Token->isa('PPI::Token::Word') ) {
		# There are some words we can be very confident are
		# being used as keywords
		my $content = $Token->content;

		unless ( $Token->snext_sibling and $Token->snext_sibling->content eq '=>' ) {
			if ( $content eq 'sub' ) {
				return 'keyword';
			} elsif ( $content eq 'return' ) {
				return 'keyword';
			} elsif ( $content eq 'undef' ) {
				return 'core';
			} elsif ( $content eq 'shift' ) {
				return 'core';
			} elsif ( $content eq 'defined' ) {
				return 'core';
			}
		}

		my $parent = $Token->parent;
		if ( $parent->isa('PPI::Statement::Include') ) {
			if ( $content =~ /^(?:use|no)$/ ) {
				return 'keyword';
			}
			if ( $content eq $parent->pragma ) {
				return 'pragma';
			}
		} elsif ( $parent->isa('PPI::Statement::Variable') ) {
			if ( $content =~ /^(?:my|local|our)$/ ) {
				return 'keyword';
			}
		} elsif ( $parent->isa('PPI::Statement::Compound') ) {
			if ( $content =~ /^(?:if|else|elsif|unless|for|foreach|while|my)$/ ) {
				return 'keyword';
			}
		} elsif ( $parent->isa('PPI::Statement::Given') ) {
			if ( $content eq 'given' ) {
				return 'keyword';
			}
		} elsif ( $parent->isa('PPI::Statement::When') ) {
			if ( $content =~ /^(?:when|default)$/ ) {
				return 'keyword';
			}
		} elsif ( $parent->isa('PPI::Statement::Package') ) {
			if ( $content eq 'package' ) {
				return 'keyword';
			}
		} elsif ( $parent->isa('PPI::Statement::Scheduled') ) {
			return 'keyword';
		}
	}

	# Normal colouring
	my $css = lc ref $Token;
	$css =~ s/^.+:://;
	$css;
}

1;