/usr/local/CPAN/Lingua-Slavic-Numbers/Lingua/Slavic/Numbers.pm
package Lingua::Slavic::Numbers;
use strict;
use Carp qw(carp);
use List::Util qw(max);
use Data::Dumper;
use Regexp::Common qw /number/;
use Exporter;
use utf8;
use vars qw( $VERSION $DEBUG @ISA @EXPORT_OK @EXPORT);
use vars qw(
%INFLEXIONS
%NUMBER_NAMES
%ORDINALS
$OUTPUT_DECIMAL_DELIMITER
$MINUS
);
use constant LANG_BG => 'bg';
use constant NO_CONJUNCTIONS => 'noconj';
use constant FEMININE_GENDER => 'fem';
use constant MASCULINE_GENDER => 'man';
use constant NEUTRAL_GENDER => 'neu';
$VERSION = 0.03;
$DEBUG = 0;
@ISA = qw(Exporter);
@EXPORT_OK = qw( &number_to_slavic &ordinate_to_slavic LANG_BG);
@EXPORT = @EXPORT_OK;
$MINUS = ('минÑÑ');
$OUTPUT_DECIMAL_DELIMITER = ('ÑÑло');
%INFLEXIONS =
(
LANG_BG,
{
FEMININE_GENDER,
{
1 => 'една',
},
MASCULINE_GENDER,
{
1 => 'един',
2 => 'два',
},
}
);
%NUMBER_NAMES =
(
LANG_BG,
{
0 => 'нÑла',
1 => 'едно',
2 => 'две',
3 => 'ÑÑи',
4 => 'ÑеÑиÑи',
5 => 'пеÑ',
6 => 'ÑеÑÑ',
7 => 'Ñедем',
8 => 'оÑем',
9 => 'девеÑ',
10 => 'деÑеÑ',
11 => 'едина{10}',
12 => 'двана{10}',
13 => '{3}на{10}',
14 => '{4}на{10}',
15 => '{5}на{10}',
16 => '{6}на{10}',
17 => '{7}на{10}',
18 => '{8}на{10}',
19 => '{9}на{10}',
20 => 'два{10}',
30 => '{3}{10}',
40 => '{4}{10}',
50 => '{5}{10}',
60 => '{6}{10}',
70 => '{7}{10}',
80 => '{8}{10}',
90 => '{9}{10}',
100 => 'ÑÑо',
200 => '{2}ÑÑа',
300 => '{3}ÑÑа',
'1e3' => 'Ñ
илÑда',
'1e4' => '{10} Ñ
илÑди',
'1e5' => '{100} Ñ
илÑди',
'1e6' => 'милион',
'1e7' => '{10} {1e6}а',
'1e8' => '{100} {1e6}а',
'1e9' => 'милиаÑд', # USA English 'billion'
'1e10' => '{10} {1e9}а',
'1e11' => '{100} {1e9}а',
'1e12' => 'ÑÑилион', # sometimes 'билион' in older usage
'1e13' => '{10} {1e12}а',
'1e14' => '{100} {1e12}а',
'1e15' => 'квадÑилион',
'1e16' => '{10} {1e15}а',
'1e17' => '{100} {1e15}а',
'1e18' => 'квинÑилион',
'1e19' => '{10} {1e18}а',
'1e20' => '{100} {1e18}а',
'1e21' => 'ÑекÑÑилион',
'1e22' => '{10} {1e21}а',
}
);
$NUMBER_NAMES{LANG_BG()}->{"${_}00"} = "{$_}ÑÑоÑин" foreach qw/4 5 6 7 8 9/;
$NUMBER_NAMES{LANG_BG()}->{'1' . '0'x(3*$_)} = $NUMBER_NAMES{LANG_BG()}->{'1e'. 3*$_} foreach 1..7;
# use Data::Dumper;
# print Dumper \%NUMBER_NAMES;
my $count = 1;
%ORDINALS =
(
LANG_BG,
{
# given in male singular formal version only, inflection TODO. Nothing above 99 yet.
0 => 'нÑлев',
1 => 'пÑÑви',
2 => 'вÑоÑи',
3 => 'ÑÑеÑи',
4 => 'ÑеÑвÑÑÑи',
5 => '{5}и',
6 => '{6}и',
7 => 'Ñедми',
8 => 'оÑми',
9 => '{9}и',
10 => '{10}и',
11 => 'едина[10]',
12 => 'двана[10]',
13 => '{3}на[10]',
13 => '{3}на[10]',
14 => '{4}на[10]',
15 => '{5}на[10]',
16 => '{6}на[10]',
17 => '{7}на[10]',
18 => '{8}на[10]',
19 => '{9}на[10]',
20 => 'два[10]',
30 => '{3}[10]',
40 => '{4}[10]',
50 => '{5}[10]',
60 => '{6}[10]',
70 => '{7}[10]',
80 => '{8}[10]',
90 => '{9}[10]',
100 => '{100}Ñен',
1000 => 'Ñ
илÑден',
10e6 => '{1e6}ен',
}
);
foreach my $lang (keys %ORDINALS)
{
foreach my $val (values %{$ORDINALS{$lang}})
{
$val = interpolate_string($lang, $val);
}
}
foreach my $lang (keys %NUMBER_NAMES)
{
foreach my $val (values %{$NUMBER_NAMES{$lang}})
{
$val = interpolate_string($lang, $val);
}
}
sub deb { print @_ if $DEBUG }
sub ordinate_to_slavic
{
my $lang = shift;
my $number = shift;
my $options = shift @_ || {};
unless ( exists $ORDINALS{$lang} )
{
carp("Ordinates for language $lang are unknown, sorry");
return undef;
}
my $hash = $ORDINALS{$lang};
unless ( $number >= 0 )
{
carp("Ordinates must not be negative");
return undef;
}
unless ( int $number == $number )
{
carp("Ordinates can only be integers");
return undef;
}
return $hash->{$number} if exists $hash->{$number};
my $max = max(keys %$hash);
if ($number > $max)
{
carp("Ordinate $number is above maximum $max and not supported, sorry");
return undef;
}
if ($lang eq LANG_BG)
{
# we may have a partially expressible ordinate number, which in
# Bulgarian for a number of N digits is done with N-1 numbers (not
# ordinals) with no conjunctions, and an 'и' conjunction before the
# last one (N) as an ordinal. Effectively it turns out to be the
# number without the least significant digit, then 'и', then the
# ordinal of the least significant digit. The exceptions should be
# handled by $ORDINALS.
my $out = '';
my $bot = $number % 10;
my $top = $number - $bot;
return interpolate_string($lang, "{{$top}@{[NO_CONJUNCTIONS()]}} и [$bot]");
}
carp("The ordinate for $number in language '$lang' couldn't be found, sorry");
return undef;
}
sub bulgarian_triplets
{
my $lang = LANG_BG;
my $hash = shift;
my $tri = shift;
my $options = shift @_ || {};
my $pow = 0;
foreach my $t (@$tri) # this is a triplet
{
my $some_left = scalar @$tri > $pow/3; # true if we're not at end of @$tri yet
# convert to scientific notation
my $canon_power = $pow;
my $canon_t = $t;
if ($t =~ m/$RE{num}{real}{-sep=>'[,.]?'}{-keep}/)
{
$canon_power = $8 || 0;
$canon_t = $3;
}
else
{
while ($canon_t >= 10)
{
$canon_t /= 10;
$canon_power ++;
}
}
my $canon = "${canon_t}e$canon_power";
deb("Working on triplet $t (power $pow, canonical $canon)\n");
if (exists $hash->{$canon})
{
$t = $hash->{$canon};
}
elsif ($t == 0) # handle 0 and '000' strings
{
if (scalar @$tri == 1) # is the zero the only number?
{
$t = 0;
redo;
}
else
{
$t = ''; # don't do anything with uninteresting zeroes
}
}
else
{
# try decomposing $t
# get rid of scientific notation
$t =~ s/(\d+)e(\d+)/$1 . 0 x $2/e;
# first, set up the qualifier
deb("getting qualifier and gender for $t\n");
my $qualifier = '';
my $inflexion = '';
my $extra_а = '';
if ($pow)
{
$qualifier = number_to_slavic($lang, "1e$pow");
$inflexion = MASCULINE_GENDER; # all but thousands are masculine
$extra_а = 'а'; # and all have 'a' when plural (singular cases are caught by the %NUMBER_NAMES hash)
if ($pow eq 3) # thousands are a special case for gender, being feminine
{
$qualifier = 'Ñ
илÑди';
$inflexion = FEMININE_GENDER;
$extra_а = ''; # no extra 'a' for thousands
}
}
$qualifier .= $extra_а;
my @n = split //, $t;
shift @n while 0 == $n[0]; # remove the leading zeroes
deb("decomposing $t, result [@n]\n");
my @inter;
while (@n)
{
my $decompose_num = shift @n;
my $decompose_pow = scalar @n;
# grab the next digit for numbers 10 .. 20
if (($decompose_num == 1 && scalar @n == 1) ||
($decompose_num == 2 && scalar @n == 1 && $n[0] == 0))
{
$decompose_num .= shift @n;
$decompose_pow = 0;
}
next unless $decompose_num; # skip zeroes
my $extra_и = '';
# numbers below 21 are one word, so in cases like 1001 (Ñ
илÑда и едно) a conjunction is needed
# ditto for 100..900
if (
# $some_left tells us there are more triplets to come
$some_left &&
(
($decompose_num <= 20 && scalar @n == 0) || # 1..20
(scalar @n == 2 && $n[0] == 0 && $n[1] == 0) # N00
)
)
{
$extra_и = ' ';
}
push @inter, sprintf("%s{%s%s}", $extra_и, $decompose_num, '0'x$decompose_pow);
}
my @inter_options = (NO_CONJUNCTIONS);
push @inter_options, $inflexion if $inflexion;
my $inter_options = join ':', @inter_options;
$inter[-1] =~ s/({.*})/{$1$inter_options}/;
my $inter = join(' ', @inter);
deb("bulgarian_triplets calling interpolate_string with [$inter]\n");
$inter = interpolate_string($lang, $inter);
if (defined $inter)
{
$t = $inter;
# add the final conjunction if requested
$t =~ s/\s(\w+)$/ и $1/ unless $options->{NO_CONJUNCTIONS()};
$t .= " ${qualifier}" if $qualifier; # add the qualifier
$t =~ s/^\s+//g; # replace leading/ending spaces
$t =~ s/\s+$//g; # replace leading/ending spaces
}
else
{
carp "Couldn't convert $canon";
}
}
$pow+=3;
}
@$tri = reverse @$tri;
return "@$tri";
}
sub find_known
{
my $lang = shift;
my $hash = shift;
my $number = shift;
my $options = shift @_ || {};
foreach my $gender (FEMININE_GENDER(), MASCULINE_GENDER())
{
return $INFLEXIONS{$lang}->{$gender}->{$number}
if (exists $options->{$gender} &&
exists $INFLEXIONS{$lang}->{$gender}->{$number});
}
return $hash->{$number} if exists $hash->{$number};
return undef;
}
sub number_to_slavic
{
my $lang = shift;
my $number = shift;
my $options = shift @_ || {};
# carp("Language $lang, number $number");
if ($number !~ m/^$RE{num}{int}$/ && $number !~ m/^$RE{num}{real}$/)
{
carp("Number $number doesn't appear to be a real number, sorry");
return undef;
}
$number =~ s/\+//g;
unless ( exists $NUMBER_NAMES{$lang} )
{
carp("Numbers for language $lang are unknown, sorry");
return undef;
}
my $hash = $NUMBER_NAMES{$lang};
my $max = max(keys %$hash);
if ($number > $max)
{
carp("Number $number is above maximum $max and not supported, sorry");
return undef;
}
return find_known($lang, $hash, $number, $options) if defined find_known($lang, $hash, $number, $options);
return "$MINUS " . number_to_slavic($lang, $1) if $number =~ m/-\s*(.*)/;
# normalize to scientific notation if exponent is specified, then expand
if ($number =~ m/$RE{num}{real}{-sep=>'[,.]?'}{-keep}/)
{
my $power = $8;
my $num = $3;
if ($power)
{
while ($num >= 10)
{
$num /= 10;
$power++;
}
return find_known($lang, $hash, $number, $options) if defined find_known($lang, $hash, $number, $options);
while ($num && int $num != $num)
{
$num *= 10;
$power--;
}
$number = $num . '0' x $power;
return find_known($lang, $hash, $number, $options) if defined find_known($lang, $hash, $number, $options);
deb("finally, got power $power and number $num => $number\n");
}
}
if (LANG_BG eq $lang)
{
# build the intepretation from the number's digits
my @components;
my @parts = split /[.,]/, $number, 2;
$parts[1] ||= ''; # always provide a floating part if it doesn't come with the number
my $n = $parts[0];
my @n;
while ($n)
{
my $old_n = $n;
my $triplet = substr $n, -3, 3, '';
deb("grabbing triplet from $old_n resulting in $n and $triplet\n");
push @n, $triplet;
}
my $out = bulgarian_triplets($hash, \@n, $options);
# clean spaces
$out =~ s/^\s*//;
$out =~ s/\s*$//;
$out =~ s/\s+/ /g;
# fix annoying bugs
# remove leading и
$out =~ s/^и\s+//g;
# fix една Ñ
илÑди
$out =~ s/^една Ñ
илÑди/Ñ
илÑда/;
return $out;
}
carp("The number representation of $number in language '$lang' couldn't be found, sorry");
my $opt_string = join '//', sort keys %$options;
$opt_string = "//$opt_string" if $opt_string;
return "$number$opt_string";
}
#
# OO Methods
#
sub new {
my $class = shift;
my $number = shift;
my $lang = shift;
bless { num => $number, lang => $lang}, $class;
}
sub parse {
my $self = shift;
if ( $_[0] )
{
$self->{num} = shift;
}
if ( $_[1] )
{
$self->{lang} = shift;
}
$self;
}
sub get_string
{
my $self = shift;
return number_to_slavic($self->{lang}, $self->{num});
}
sub get_ordinate
{
my $self = shift;
return ordinate_to_slavic($self->{lang}, $self->{num});
}
### cperl-mode doesn't like this, so I put it at the end
sub interpolate_string
{
my $lang = shift;
my $data = shift;
while ($data =~ m/\[$RE{num}{real}{-sep=>'[,.]?'}\]+/ || # [number]
$data =~ m/{$RE{num}{real}{-sep=>'[,.]?'}}+/) # {number}
{
$data =~ s/{
{
$RE{num}{dec}{-sep=>'[,.]?'}{-keep}
}
([:\w]+)?
}
/
number_to_slavic($lang,
$1,
{ map { $_ => 1 } split(':', $11) }
)
/giex;
$data =~ s/
{
$RE{num}{dec}{-sep=>'[,.]?'}{-keep}
}
/number_to_slavic($lang, $1)/giex;
$data =~ s/
\[
\[
$RE{num}{real}{-sep=>'[,.]?'}{-keep}
\]
([:\w]+)?
\]
/
ordinate_to_slavic(
$lang,
$1,
{ map { $_ => 1 } split(':', $2) }
)
/giex;
$data =~ s/
\[
$RE{num}{real}{-sep=>'[,.]?'}{-keep}
\]
/ordinate_to_slavic($lang, $1)/giex;
}
return $data;
}
1;
__END__
=pod
=head1 NAME
Lingua::Slavic::Numbers - Converts numeric values into their Slavic
string equivalents. Bulgarian is supported so far.
=head1 SYNOPSIS
# Procedural Style
use Lingua::Slavic::Numbers qw(number_to_slavic ordinate_to_slavic);
print number_to_slavic('bg', 345 );
my $twenty = ordinate_to_slavic('bg', 20 );
print "Ordinate of 20 is $twenty";
# OO Style
use Lingua::Slavic::Numbers;
# specifies default language
my $number = Lingua::Slavic::Numbers->new( 123, Lingua:Slavic::Numbers::LANG_BG );
print $number->get_string;
print $number->get_ordinate;
# override language
print $number->get_string(Lingua:Slavic::Numbers::LANG_BG);
print $number->get_ordinate(Lingua:Slavic::Numbers::LANG_BG);
# default language, no number
my $other_number = Lingua::Slavic::Numbers->new(Lingua:Slavic::Numbers::LANG_BG);
$other_number->parse( 7340 );
$bg_string = $other_number->get_string;
=head1 DESCRIPTION
This module converts a number into a Slavic-language cardinal or
ordinal. Bulgarian is supported so far.
The interface tries to conform to the one defined in Lingua::EN::Number,
though this module does not provide any parse() method. Also,
unlike Lingua::En::Numbers, you can use this module in a procedural
manner by importing the number_to_LL() function (LL=bg so far).
If you plan to use this module with greater numbers (>10e20), you can use
the Math::BigInt module:
use Math::BigInt;
use Lingua::Slavic::Numbers qw( number_to_slavic );
my $big_num = new Math::BigInt '1.23e68';
print number_to_slavic('bg', $big_num);
=head1 FUNCTION-ORIENTED INTERFACE
=head2 number_to_slavic( $lang, $number )
use Lingua::Slavic::Numbers qw(number_to_slavic);
my $depth = number_to_slavic('bg', 20_000 );
my $year = number_to_slavic('bg', 1870 );
# in honor of Lingua::FR::Numbers, which I copied to start this
# module, I'm using a French example
print "ÐÑл ÐеÑн напиÑа ,,$depth левги под моÑеÑо'' в $year.";
This function can be exported by the module.
=head2 ordinate_to_slavic( $lang, $number )
use Lingua::Slavic::Numbers qw(ordinate_to_slavic);
my $twenty = ordinate_to_slavic('bg', 20 );
print "ÐÐ¾Ð¼ÐµÑ $twenty";
This function can be exported by the module.
=head1 CONSTANTS
Bulgarian: Lingua:Slavic::Numbers::LANG_BG ('bg')
=head1 SOURCE
Lingua::FR::Numbers for the code
=head1 BUGS
Though the module should be able to convert big numbers (up to 10**36),
I do not know how Perl handles them.
Please report any bugs or comments using the Request Tracker interface:
https://rt.cpan.org/NoAuth/Bugs.html?Dist=Lingua-Slavic-Numbers
=head1 COPYRIGHT
Copyright 2008, Ted Zlatanov (Ð¢ÐµÐ¾Ð´Ð¾Ñ ÐлаÑанов). All Rights
Reserved. This module can be redistributed under the same terms as
Perl itself.
=head1 AUTHOR
Ted Zlatanov <tzz@lifelogs.com>
=head1 SEE ALSO
Lingua::EN::Numbers, Lingua::Word2Num