MetaTrans::SmsCz - MetaTrans plug-in for L<http://slovniky.sms.cz/>


MetaTrans documentation Contained in the MetaTrans distribution.

Index


Code Index:

NAME

Top

MetaTrans::SmsCz - MetaTrans plug-in for http://slovniky.sms.cz/

CONSTRUCTOR METHODS

Top

MetaTrans::SmsCz->new(%options)

This method constructs a new MetaTrans::SmsCz object and returns it. All %options are passed to MetaTrans::Base->new. The method also sets supported translation directions and the host_server attribute.

METHODS

Top

Methods are inherited from MetaTrans::Base. Following methods are overriden:

$plugin->create_request($expression, $src_lang_code, $dest_lang_code)

Create and return a HTTP::Request object to be used for retrieving translation of the $expression from $src_lang_code language to $dest_lang_code language.

$plugin->process_response($contents, $src_lang_code, $dest_lang_code)

Process the server response contents. Return the result of the translation in an array of following form:

    (expression_1, translation_1, expression_2, translation_2, ...)

BUGS

Top

Please report any bugs or feature requests to bug-metatrans@rt.cpan.org, or through the web interface at http://rt.cpan.org. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes.

AUTHOR

Top

Jan Pomikalek, <xpomikal@fi.muni.cz>

COPYRIGHT & LICENSE

Top

SEE ALSO

Top

MetaTrans, MetaTrans::Base, MetaTrans::Languages, HTTP::Request, URI::Escape


MetaTrans documentation Contained in the MetaTrans distribution.
package MetaTrans::SmsCz;

use strict;
use warnings;
use vars qw($VERSION @ISA);
use MetaTrans::Base qw(convert_to_utf8);

use Encode;
use HTTP::Request;

$VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf "%d."."%02d", @r };
@ISA     = qw(MetaTrans::Base);

sub new
{
    my $class   = shift;
    my %options = @_;

    $options{host_server} = "slovniky.sms.cz"
        unless (defined $options{host_server});

    my $self = new MetaTrans::Base(%options);
    $self = bless $self, $class;

    $self->set_languages("cze", "eng", "ger", "fre", "spa", "ita", "rus");

    $self->set_dir_1_to_all("cze");
    $self->set_dir_all_to_1("cze");

    return $self;
}

sub create_request
{
    my $self           = shift;
    my $expression     = shift;
    my $src_lang_code  = shift;
    my $dest_lang_code = shift;

    my %table = (
        cze => "cz",
        eng => "en",
        ger => "de",
        fre => "fr",
        spa => "es",
        ita => "it",
        rus => "ru",
    );

    # convert to Perl's internal UTF-8 format
    $expression = Encode::decode_utf8($expression)
        unless Encode::is_utf8($expression);
    
    # replace blanks with pluses (+)
    $expression =~ s/\s+/+/g;

    # convert to cp1250 character encoding (that's what server expects)
    $expression = Encode::encode("cp1250", lc $expression)
	if $src_lang_code ne 'rus';

    # do some server-specific character escapings
    $expression = &_my_escape($expression);

    my $query = 
        "http://slovniky.sms.cz/index.php?" .
        "P_id_kategorie=65456" .
        "&P_soubor=/slovniky/index.php" .
        "&send_data=1" .
        "&word=$expression" .
        "&bjvolba=" . $table{$src_lang_code} . "_" . $table{$dest_lang_code};
    my $request = HTTP::Request->new(GET => $query);

    return $request;
}

sub process_response
{
    my $self           = shift;
    my $contents       = shift;
    my $src_lang_code  = shift;
    my $dest_lang_code = shift;

    my @result;
    while ($contents =~ m|
                <a\s[^>]*>\s*([^<]*?)\s*</a>
                \s*</td>\s*<td\s[^>]*>
                \s*&nbsp;-&nbsp;
                \s*</td>\s*<td\s[^>]*>
                \s*<a\s[^>]*>\s*([^<]*?)\s*</a>
        |gsix)
    {
        # the output is in cp1250 character encoding with HTML entities,
        # let's convert it to UTF-8
        my $expr  = convert_to_utf8('cp1250', $1);
        my $trans = convert_to_utf8('cp1250', $2);

        $expr = &_normalize_german_article($expr)
            if $src_lang_code eq 'ger';

        $trans = &_normalize_german_article($trans)
            if $dest_lang_code eq 'ger';
        
        push @result, ($expr, $trans);
    }

    return @result;
}

# server specific character escaping
# it's really strange, so don't worry if you don't understand it
sub _my_escape
{
    my $unesc = shift;
    my $result;

    foreach my $char (split //, $unesc)
    {
        my $ord = ord($char);
        $result .= $ord>>4 == 0x43 ? sprintf('%%E%X', $ord & 0xf) :
                   $ord>>4 == 0x44 ? sprintf('%%F%X', $ord & 0xf) :
                   $ord    == 0x2B ? '+'                          :
                                     sprintf('%%%X', $ord);
    }

    return $result;
}

# der Hund -> Hund; r
sub _normalize_german_article
{
    my $expr = shift;
    my $expr_dec = decode_utf8($expr);
    
    $expr_dec = $2 . "; " . substr($1, 2, 1)
        if $expr_dec =~ /^(der|die|das) (\w+)$/;
    
    return Encode::encode_utf8($expr_dec);
}

1;

__END__