Hailo::Engine::Default - The default engine backend for L<Hailo|Hailo>


Hailo documentation Contained in the Hailo distribution.

Index


Code Index:

NAME

Top

Hailo::Engine::Default - The default engine backend for Hailo

DESCRIPTION

Top

This backend implements the logic of replying to and learning from input using the resources given to the engine roles.

It generates the reply in one go, while favoring some of the tokens in the input, and returns it. It is fast and the replies are decent, but you can get better replies (at the cost of speed) with the Scored engine.

AUTHORS

Top

Hinrik Örn Sigurðsson, hinrik.sig@gmail.com

Ævar Arnfjörð Bjarmason <avar@cpan.org>

LICENSE AND COPYRIGHT

Top


Hailo documentation Contained in the Hailo distribution.

package Hailo::Engine::Default;
BEGIN {
  $Hailo::Engine::Default::AUTHORITY = 'cpan:AVAR';
}
BEGIN {
  $Hailo::Engine::Default::VERSION = '0.69';
}

use 5.010;
use Any::Moose;
use List::Util qw<min first shuffle>;
use List::MoreUtils qw<uniq>;

with qw[ Hailo::Role::Arguments Hailo::Role::Engine ];

has repeat_limit => (
    isa     => 'Int',
    is      => 'rw',
    lazy    => 1,
    default => sub {
        my ($self) = @_;
        my $order = $self->order;
        return min(($order * 10), 50);
    }
);

sub BUILD {
    my ($self) = @_;

    # This performance hack is here because in our tight loops calling
    # $self->storage->sth->{...} is actually a significant part of the
    # overall program execution time since we're doing two method
    # calls and hash dereferences for each call to the database.

    my $sth = $self->storage->sth;
    while (my ($k, $v) = each %$sth) {
        $self->{"_sth_$k"} = $v;
    }

    return;
}

## no critic (Subroutines::ProhibitExcessComplexity)
sub reply {
    my $self = shift;
    my $tokens = shift // [];

    # we will favor these tokens when making the reply. Shuffle them
    # and discard half.
    my @key_tokens = do {
        my $i = 0;
        grep { $i++ % 2 == 0 } shuffle(@$tokens);
    };

    my $token_cache = $self->_resolve_input_tokens($tokens);
    my @key_ids = keys %$token_cache;

    # sort the rest by rareness
    @key_ids = $self->_find_rare_tokens(\@key_ids, 2);

    # get the middle expression
    my $pivot_token_id = shift @key_ids;
    my ($pivot_expr_id, @token_ids) = $self->_random_expr($pivot_token_id);
    return unless defined $pivot_expr_id; # we don't know any expressions yet

    # remove key tokens we're already using
    @key_ids = grep { my $used = $_; !first { $_ == $used } @token_ids } @key_ids;

    my %expr_cache;

    # construct the end of the reply
    $self->_construct_reply('next', $pivot_expr_id, \@token_ids, \%expr_cache, \@key_ids);

    # construct the beginning of the reply
    $self->_construct_reply('prev', $pivot_expr_id, \@token_ids, \%expr_cache, \@key_ids);

    # translate token ids to token spacing/text
    my @output = map {
        $token_cache->{$_} // ($token_cache->{$_} = $self->_token_info($_))
    } @token_ids;
    return \@output;
}

sub _resolve_input_tokens {
    my ($self, $tokens) = @_;
    my %token_cache;

    if (@$tokens == 1) {
        my ($spacing, $text) = @{ $tokens->[0] };
        my $token_info = $self->_token_resolve($spacing, $text);

        if (defined $token_info) {
            my ($id, $count) = @$token_info;
            $token_cache{$id} = [$spacing, $text, $count];
        }
        else {
            # when there's just one token, it could be ';' for example,
            # which will have normal spacing when it appears alone, but
            # suffix spacing in a sentence like "those things; foo, bar",
            # so we'll be a bit more lax here by also looking for any
            # token that has the same text
            $token_info = $self->_token_similar($text);
            if (defined $token_info) {
                my ($id, $spacing, $count) = @$token_info;
                $token_cache{$id} = [$spacing, $text, $count];
            }
        }
    }
    else {
        for my $token (@$tokens) {
            my ($spacing, $text) = @$token;
            my $token_info = $self->_token_resolve($spacing, $text);
            next if !defined $token_info;
            my ($id, $count) = @$token_info;
            $token_cache{$id} = [$spacing, $text, $count];
        }
    }

    return \%token_cache;
}

sub _token_resolve {
    my ($self, $spacing, $text) = @_;

    $self->{_sth_token_resolve}->execute($spacing, $text);
    return $self->{_sth_token_resolve}->fetchrow_arrayref;
}

sub _token_info {
    my ($self, $id) = @_;

    $self->{_sth_token_info}->execute($id);
    my @res = $self->{_sth_token_info}->fetchrow_array;
    return \@res;
}

sub learn {
    my ($self, $tokens) = @_;
    my $order = $self->order;

    # only learn from inputs which are long enough
    return if @$tokens < $order;

    my (%token_cache, %expr_cache);

    # resolve/add tokens and update their counter
    for my $token (@$tokens) {
        my $key = join '', @$token; # the key is "$spacing$text"
        if (!exists $token_cache{$key}) {
            $token_cache{$key} = $self->_token_id_add($token);
        }
        $self->{_sth_inc_token_count}->execute(1, $token_cache{$key});
    }

    # process every expression of length $order
    for my $i (0 .. @$tokens - $order) {
        my @expr = map { $token_cache{ join('', @{ $tokens->[$_] }) } } $i .. $i+$order-1;
        my $key = join('_', @expr);

        if (!defined $expr_cache{$key}) {
            $expr_cache{$key} = $self->_expr_id_add(\@expr);
        }
        my $expr_id = $expr_cache{$key};

        # add link to next token for this expression, if any
        if ($i < @$tokens - $order) {
            my $next_id = $token_cache{ join('', @{ $tokens->[$i+$order] }) };
            $self->_inc_link('next_token', $expr_id, $next_id, 1);
        }

        # add link to previous token for this expression, if any
        if ($i > 0) {
            my $prev_id = $token_cache{ join('', @{ $tokens->[$i-1] }) };
            $self->_inc_link('prev_token', $expr_id, $prev_id, 1);
        }

        # add links to boundary token if appropriate
        my $b = $self->storage->_boundary_token_id;
        $self->_inc_link('prev_token', $expr_id, $b, 1) if $i == 0;
        $self->_inc_link('next_token', $expr_id, $b, 1) if $i == @$tokens-$order;
    }

    return;
}

sub learn_cached {
    my ($self, $tokens) = @_;
    my $order = $self->order;

    # only learn from inputs which are long enough
    return if @$tokens < $order;

    my (%token_cache, %expr_cache);

    # resolve/add tokens and update their counter
    for my $token (@$tokens) {
        my $key = join '', @$token; # the key is "$spacing$text"
        if (!exists $token_cache{$key}) {
            my $token_id = $self->_token_id_add($token);
            $token_cache{$key} = $token_id;
            $self->{_updates}{token_count}{$token_id}++;
        }
    }

    # process every expression of length $order
    for my $i (0 .. @$tokens - $order) {
        my @expr = map { $token_cache{ join('', @{ $tokens->[$_] }) } } $i .. $i+$order-1;
        my $key = join('_', @expr);

        if (!defined $expr_cache{$key}) {
            $expr_cache{$key} = $self->_expr_id_add(\@expr);
        }
        my $expr_id = $expr_cache{$key};

        # add link to next token for this expression, if any
        if ($i < @$tokens - $order) {
            my $next_id = $token_cache{ join('', @{ $tokens->[$i+$order] }) };
            $self->{_updates}{next_token}{$expr_id}{$next_id}++;
        }

        # add link to previous token for this expression, if any
        if ($i > 0) {
            my $prev_id = $token_cache{ join('', @{ $tokens->[$i-1] }) };
            $self->{_updates}{prev_token}{$expr_id}{$prev_id}++;
        }

        # add links to boundary token if appropriate
        my $b = $self->storage->_boundary_token_id;
        $self->{_updates}{prev_token}{$expr_id}{$b}++ if $i == 0;
        $self->{_updates}{next_token}{$expr_id}{$b}++ if $i == @$tokens-$order;
    }

    return;
}

sub flush_cache {
    my ($self) = @_;

    my $updates = $self->{_updates};
    return if !$updates;

    while (my ($token_id, $count) = each %{ $updates->{token_count} }) {
        $self->{_sth_inc_token_count}->execute($count, $token_id);
    }

    while (my ($expr_id, $links) = each %{ $updates->{next_token} }) {
        while (my ($next_token_id, $count) = each %$links) {
            $self->_inc_link('next_token', $expr_id, $next_token_id, $count);
        }
    }

    while (my ($expr_id, $links) = each %{ $updates->{prev_token} }) {
        while (my ($prev_token_id, $count) = each %$links) {
            $self->_inc_link('prev_token', $expr_id, $prev_token_id, $count);
        }
    }
}

# sort token ids based on how rare they are
sub _find_rare_tokens {
    my ($self, $token_ids, $min) = @_;
    return unless @$token_ids;

    my %links;
    for my $id (@$token_ids) {
        next if exists $links{$id};
        $self->{_sth_token_count}->execute($id);
        $links{$id} = $self->{_sth_token_count}->fetchrow_array;
    }

    # remove tokens which are too rare
    my @ids = grep { $links{$_} >= $min } @$token_ids;

    @ids = sort { $links{$a} <=> $links{$b} } @ids;

    return @ids;
}

# increase the link weight between an expression and a token
sub _inc_link {
    my ($self, $type, $expr_id, $token_id, $count) = @_;

    $self->{"_sth_${type}_inc"}->execute($count, $expr_id, $token_id);
    if (!$self->{"_sth_${type}_inc"}->rows) {
        $self->{"_sth_${type}_add"}->execute($expr_id, $token_id, $count);
    }

    return;
}

# look up/add an expression id based on tokens
sub _expr_id_add {
    my ($self, $token_ids) = @_;

    $self->{_sth_expr_id}->execute(@$token_ids);
    my $expr_id = $self->{_sth_expr_id}->fetchrow_array();
    return $expr_id if defined $expr_id;

    $self->{_sth_add_expr}->execute(@$token_ids);
    return $self->storage->dbh->last_insert_id(undef, undef, "expr", undef);
}

# return token id if the token exists
sub _token_id {
    my ($self, $token_info) = @_;

    $self->{_sth_token_id}->execute(@$token_info);
    my $token_id = $self->{_sth_token_id}->fetchrow_array();

    return unless defined $token_id;
    return $token_id;
}

# get token id (adding the token if it doesn't exist)
sub _token_id_add {
    my ($self, $token_info) = @_;

    my $token_id = $self->_token_id($token_info);
    $token_id = $self->_add_token($token_info) unless defined $token_id;
    return $token_id;
}

# return all tokens (regardless of spacing) that consist of this text
sub _token_similar {
    my ($self, $token_text) = @_;
    $self->{_sth_token_similar}->execute($token_text);
    return $self->{_sth_token_similar}->fetchrow_arrayref;
}

# add a new token and return its id
sub _add_token {
    my ($self, $token_info) = @_;
    $self->{_sth_add_token}->execute(@$token_info);
    return $self->storage->dbh->last_insert_id(undef, undef, "token", undef);
}

# return a random expression containing the given token
sub _random_expr {
    my ($self, $token_id) = @_;

    my $expr;

    if (!defined $token_id) {
        $self->{_sth_random_expr}->execute();
        $expr = $self->{_sth_random_expr}->fetchrow_arrayref();
    }
    else {
        # try the positions in a random order
        for my $pos (shuffle 0 .. $self->order-1) {
            my $column = "token${pos}_id";

            # get a random expression which includes the token at this position
            $self->{"_sth_expr_by_$column"}->execute($token_id);
            $expr = $self->{"_sth_expr_by_$column"}->fetchrow_arrayref();
            last if defined $expr;
        }
    }

    return unless defined $expr;
    return @$expr;
}

# return a new next/previous token
sub _pos_token {
    my ($self, $pos, $expr_id, $key_tokens) = @_;

    $self->{"_sth_${pos}_token_get"}->execute($expr_id);
    my $pos_tokens = $self->{"_sth_${pos}_token_get"}->fetchall_arrayref();

    if (defined $key_tokens) {
        for my $i (0 .. $#{ $key_tokens }) {
            my $want_id = $key_tokens->[$i];
            my @ids     = map { $_->[0] } @$pos_tokens;
            my $has_id  = grep { $_ == $want_id } @ids;
            next unless $has_id;
            return splice @$key_tokens, $i, 1;
        }
    }

    my @novel_tokens;
    for my $token (@$pos_tokens) {
        push @novel_tokens, ($token->[0]) x $token->[1];
    }
    return $novel_tokens[rand @novel_tokens];
}

sub _construct_reply {
    my ($self, $what, $expr_id, $token_ids, $expr_cache, $key_ids) = @_;
    my $order          = $self->order;
    my $repeat_limit   = $self->repeat_limit;
    my $boundary_token = $self->storage->_boundary_token_id;

    my $i = 0;
    while (1) {
        if (($i % $order) == 0 and
            (($i >= $repeat_limit * 3) ||
             ($i >= $repeat_limit and uniq(@$token_ids) <= $order))) {
            last;
        }

        my $id = $self->_pos_token($what, $expr_id, $key_ids);
        last if $id == $boundary_token;

        my @ids;
        given ($what) {
            when ('next') {
                push @$token_ids, $id;
                @ids = @$token_ids[-$order..-1];
            }
            when ('prev') {
                unshift @$token_ids, $id;
                @ids = @$token_ids[0..$order-1];
            }
        }

        my $key = join '_', @ids;
        if (!defined $expr_cache->{$key}) {
            $expr_cache->{$key} = $self->_expr_id_add(\@ids);
        }
        $expr_id = $expr_cache->{$key};
    } continue {
        $i++;
    }

    return;
}

__PACKAGE__->meta->make_immutable;