WWW::Wikipedia::Entry - A class for representing a Wikipedia Entry


WWW-Wikipedia documentation Contained in the WWW-Wikipedia distribution.

Index


Code Index:

NAME

Top

WWW::Wikipedia::Entry - A class for representing a Wikipedia Entry

SYNOPSIS

Top

    my $wiki = WWW::Wikipedia->new();
    my $entry = $wiki->search( 'Perl' );
    print $entry->text();

    my $entry_es = $entry->language( 'es' );
    print $entry_es->text();

DESCRIPTION

Top

WWW::Wikipedia::Entry objects are usually created using the search() method on a WWW::Wikipedia object to search for a term. Once you've got an entry object you can then extract pieces of information from the entry using the following methods.

METHODS

Top

new()

You probably won't use this one, it's the constructor that is called behind the scenes with the correct arguments by WWW::Wikipedia::search().

text()

The brief text for the entry. This will provide the first paragraph of text; basically everything up to the first heading. Ordinarily this will be what you want to use. When there doesn't appear to be summary text you will be returned the fulltext instead.

If text() returns nothing then you probably are looking at a disambiguation entry, and should use related() to lookup more specific entries.

text_basic()

The same as text(), but not run through Text::Autoformat.

fulltext()

Returns the full text for the entry, which can be extensive.

fulltext_basic()

The same as fulltext(), but not run through Text::Autoformat.

title()

Returns a title of the entry.

categories()

Returns a list of categories which the entry is part of. So Perl is part of the Programming languages category.

headings()

Returns a list of headings used in the entry.

raw()

Returns the raw wikitext for the entry.

language()

With no parameters, it will return the current language of the entry. By specifying a two-letter language code, it will return the same entry in that language, if available.

languages()

Returns an array of two letter language codes denoting the languages in which this entry is available.

AUTHORS

Top

Ed Summers <ehs@pobox.com>

Brian Cassidy <bricas@cpan.org>

COPYRIGHT AND LICENSE

Top


WWW-Wikipedia documentation Contained in the WWW-Wikipedia distribution.
package WWW::Wikipedia::Entry;

use strict;
use warnings;
use Text::Autoformat;
use WWW::Wikipedia;

sub new {
    my ( $class, $raw, $src ) = @_;
    return if length( $raw ) == 0;
    my $self = bless {
        raw         => $raw,
        src         => $src,
        text        => '',
        fulltext    => '',
        cursor      => 0,
        related     => [],
        categories  => [],
        headings    => [],
        languages   => {},
        currentlang => ''
        },
        ref( $class ) || $class;
    $self->_parse();

    # store un-"pretty"-ed version of text
    $self->{ fulltext_basic } = $self->{ fulltext };
    $self->{ text_basic }     = $self->{ text };

    $self->{ fulltext } = _pretty( $self->{ fulltext } );
    $self->{ text }     = _pretty( $self->{ text } );
    return ( $self );
}

sub text {
    my $self = shift;
    return $self->{ text } if $self->{ text };
    return $self->fulltext();
}

sub text_basic {
    my $self = shift;
    return $self->{ text_basic } if $self->{ text_basic };
    return $self->fulltext_basic();
}

sub fulltext {
    my $self = shift;
    return $self->{ fulltext };
}

sub fulltext_basic {
    my $self = shift;
    return $self->{ fulltext_basic };
}


sub title {
    my $self = shift;
    return $self->{ title };
}

sub related {
    return ( @{ shift->{ related } } );
}

sub categories {
    return ( @{ shift->{ categories } } );
}

sub headings {
    return ( @{ shift->{ headings } } );
}

sub raw {
    my $self = shift;
    return $self->{ raw };
}

sub language {
    my $self = shift;
    my $lang = shift;

    return $self->{ currentlang } unless defined $lang;
    return undef unless exists $self->{ languages }->{ $lang };

    my $wiki = WWW::Wikipedia->new( language => $lang );
    return $wiki->search( $self->{ languages }->{ $lang } );
}

sub languages {
    my $self = shift;

    return keys %{ $self->{ languages } };
}

## messy internal routine for barebones parsing of wikitext

sub _parse {
    my $self = shift;
    my $raw  = $self->{ raw };
    my $src  = $self->{ src };

    # Add current language
    my ( $lang )  = ( $src =~ /http:\/\/(..)/ );
    my $title = ( split( /\//, $src ) )[ -1 ];

    if( $title =~ m{\?title=} ) {
        ( $title ) = $src =~ m{\?title=([^\&]+)};
        $title =~ s{_}{ }g;
    }

    $self->{ currentlang } = $lang;
    $self->{ languages }->{ $lang } = $title;
    $self->{ title } = $title;

    for (
        $self->{ cursor } = 0;
        $self->{ cursor } < length( $raw );
        $self->{ cursor }++
        )
    {

        pos( $raw ) = $self->{ cursor };

        ## [[ ... ]]
        if ( $raw =~ /\G\[\[ *(.*?) *\]\]/ ) {
            my $directive = $1;
            $self->{ cursor } += length( $& ) - 1;
            if ( $directive =~ /\:/ ) {
                my ( $type, $text ) = split /:/, $directive;
                if ( lc( $type ) eq 'category' ) {
                    push( @{ $self->{ categories } }, $text );
                }

                # language codes
                if ( length( $type ) == 2 and lc( $type ) eq $type ) {
                    $self->{ languages }->{ $type } = $text;
                }
            }
            elsif ( $directive =~ /\|/ ) {
                my ( $lookup, $name ) = split /\|/, $directive;
                $self->{ fulltext } .= $name;
                push( @{ $self->{ related } }, $lookup ) if $lookup !~ /^#/;
            }
            else {
                $self->{ fulltext } .= $directive;
                push( @{ $self->{ related } }, $directive );
            }
        }

        ## === heading 2 ===
        elsif ( $raw =~ /\G=== *(.*?) *===/ ) {
            ### don't bother storing these headings
            $self->{ fulltext } .= $1;
            $self->{ cursor } += length( $& ) - 1;
            next;
        }

        ## == heading 1 ==
        elsif ( $raw =~ /\G== *(.*?) *==/ ) {
            push( @{ $self->{ headings } }, $1 );
            $self->{ text } = $self->{ fulltext } if !$self->{ seenHeading };
            $self->{ seenHeading } = 1;
            $self->{ fulltext } .= $1;
            $self->{ cursor } += length( $& ) - 1;
            next;
        }

        ## '' italics ''
        elsif ( $raw =~ /\G'' *(.*?) *''/ ) {
            $self->{ fulltext } .= $1;
            $self->{ cursor } += length( $& ) - 1;
            next;
        }

        ## {{ disambig }}
        elsif ( $raw =~ /\G{{ *(.*?) *}}/ ) {
            ## ignore for now
            $self->{ cursor } += length( $& ) - 1;
            next;
        }

        else {
            $self->{ fulltext } .= substr( $raw, $self->{ cursor }, 1 );
        }
    }
}

sub _pretty {
    my $text = shift;

    # Text::Autoformat v1.13 chokes on strings that are one or more "\n"
    return '' if $text =~ m/^\n+$/;
    return autoformat(
        $text,
        {   left    => 0,
            right   => 80,
            justify => 'left',
            all     => 1
        }
    );
}

1;