MetaTrans::SeznamCz - MetaTrans plug-in for L<http://slovnik.seznam.cz/>


MetaTrans documentation Contained in the MetaTrans distribution.

Index


Code Index:

NAME

Top

MetaTrans::SeznamCz - MetaTrans plug-in for http://slovnik.seznam.cz/

CONSTRUCTOR METHODS

Top

MetaTrans::SeznamCz->new(%options)

This method constructs a new MetaTrans::SeznamCz object and returns it. All %options are passed to MetaTrans::Base->new. The method also sets supported translation directions and the host_server attribute.

METHODS

Top

Methods are inherited from MetaTrans::Base. Following methods are overriden:

$plugin->create_request($expression, $src_lang_code, $dest_lang_code)

Create and return a HTTP::Request object to be used for retrieving translation of the $expression from $src_lang_code language to $dest_lang_code language.

$plugin->process_response($contents, $src_lang_code, $dest_lang_code)

Process the server response contents. Return the result of the translation in an array of following form:

    (expression_1, translation_1, expression_2, translation_2, ...)

BUGS

Top

Please report any bugs or feature requests to bug-metatrans@rt.cpan.org, or through the web interface at http://rt.cpan.org. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes.

AUTHOR

Top

Jan Pomikalek, <xpomikal@fi.muni.cz>

COPYRIGHT & LICENSE

Top

SEE ALSO

Top

MetaTrans, MetaTrans::Base, MetaTrans::Languages, HTTP::Request, URI::Escape


MetaTrans documentation Contained in the MetaTrans distribution.
package MetaTrans::SeznamCz;

use strict;
use warnings;
use vars qw($VERSION @ISA);
use MetaTrans::Base;

use HTTP::Request;
use URI::Escape;

$VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf "%d."."%02d", @r };
@ISA     = qw(MetaTrans::Base);

sub new
{
    my $class   = shift;
    my %options = @_;

    $options{host_server} = "slovnik.seznam.cz"
        unless (defined $options{host_server});

    my $self = new MetaTrans::Base(%options);
    $self = bless $self, $class;

    $self->set_languages("cze", "eng", "ger", "fre", "spa", "ita", "rus");

    $self->set_dir_1_to_all("cze");
    $self->set_dir_all_to_1("cze");

    return $self;
}

sub create_request
{
    my $self           = shift;
    my $expression     = shift;
    my $src_lang_code  = shift;
    my $dest_lang_code = shift;

    my %table = (
        cze => "cz",
        eng => "en",
        ger => "de",
        fre => "fr",
        spa => "es",
        ita => "it",
        rus => "ru",
    );

    my $query =
        'http://slovnik.seznam.cz/?' .
        "q=" . uri_escape($expression) .
        "&lang=" . $table{$src_lang_code} . "_" . $table{$dest_lang_code};
    my $request = HTTP::Request->new(GET => $query);

    return $request;
}

sub process_response
{
    my $self           = shift;
    my $contents       = shift;
    my $src_lang_code  = shift;
    my $dest_lang_code = shift;

    my @result;
    while ($contents =~ m|
                <tr>
                \s*
                <td\sclass="word">
                \s*
                (.*?)
                \s*
                </td>
                \s*
                <td\sclass="translated">
                (.*?)
                </td>
                \s*
                </tr>
        |gsix)
    {
        my $expr  = _get_expr($1);
        my @trans = _get_trans($2);

        $expr = _normalize_german($expr)
            if $src_lang_code eq 'ger';

        foreach my $trans (@trans) {

            $trans = _normalize_german($trans)
                if $dest_lang_code eq 'ger';

            push @result, ($expr, $trans);
        }
    }

    return @result;
}

sub _get_expr
{
    my $string = shift;
    $string =~ s/<img[^>]+>//g;
    $string =~ s/<a\s+href="[^>]+"><\/a>//g;
    if ($string =~ m/<a\s+href="[^>]+">(.*?)<\/a>/)
    {
        return $1;
    }
    else
    {
        return '';
    }
}

sub _get_trans
{
    my $string = shift;
    $string =~ s/<br\s*\/>//g;
    $string =~ s/<img[^>]+>//g;
    $string =~ s/<a\s+href="[^>]+"><\/a>//g;
    $string =~ s/\s{2,}/ /g;
    my @result;
    while ($string =~ m/<a\s+href="[^>]+">(.*?)<\/a>/gimx)
    {
        push @result, $1;
    }
    return @result;
}

# normalize german article: Hund, r -> Hund; r
sub _normalize_german
{
    my $expr = shift;

    # normalize german article: Hund (der) -> Hund; r
    $expr = $1 . "; " . substr($2, 2, 1)
        if $expr =~ m/^(.*?),\s+(der|die|das)$/;

    return $expr;
}

1;

__END__