Lingua::EN::Numbers - turn "407" into "four hundred and seven", etc.


Lingua-EN-Numbers documentation Contained in the Lingua-EN-Numbers distribution.

Index


Code Index:

NAME

Top

Lingua::EN::Numbers - turn "407" into "four hundred and seven", etc.

SYNOPSIS

Top

  use Lingua::EN::Numbers qw(num2en num2en_ordinal);

  my $x = 234;
  my $y = 54;
  print "You have ", num2en($x), " things to do today!\n";
  print "You will stop caring after the ", num2en_ordinal($y), ".\n";

prints:

  You have two hundred and thirty-four things to do today!
  You will stop caring after the fifty-fourth.

DESCRIPTION

Top

Lingua::EN::Numbers turns numbers into English text. It exports (upon request) two functions, num2en and num2en_ordinal. Each takes a scalar value and returns a scalar value. The return value is the English text expressing that number; or if what you provided wasn't a number, then they return undef.

This module can handle integers like "12" or "-3" and real numbers like "53.19".

This module also understands exponential notation -- it turns "4E9" into "four times ten to the ninth"). And it even turns "INF", "-INF", "NaN" into "infinity", "negative infinity", and "not a number", respectively.

Any commas in the input numbers are ignored.

LEGACY INTERFACE

Top

For some amount of backward compatability with the old (before 1.01) version of this module, the old OO interface is supported, where you can construct a number object with new([optionalvalue]), change its value with parse(value), and get its Engish expression with get_string().

The rest of the old version's interface (like $Lingua::EN::Numbers::MODE and the other $Lingua::EN::Numbers::ALLCAPS variables) are no longer supported. This is not a great loss, as much of that stuff never worked right in the first place.

SEE ALSO

Top

Lingua::EN::WordsToNumbers

Lingua::EN::Numbers::Ordinate

Lingua::EN::Numbers::Years

COPYRIGHT

Top

AUTHOR

Top

Sean M. Burke, sburke@cpan.org

(The author of the 0.01 version was Stephen Pandich.)


Lingua-EN-Numbers documentation Contained in the Lingua-EN-Numbers distribution.

require 5.004;  # Time-stamp: "2005-01-05 17:12:51 AST"
package Lingua::EN::Numbers;

require Exporter;
@ISA = qw(Exporter);

use strict;
BEGIN { *DEBUG = sub () {0} unless defined &DEBUG } # setup a DEBUG constant
use vars qw(
 @EXPORT @EXPORT_OK $VERSION
 $MODE $TRUE $FALSE
 %D %Card2ord %Mult
);
$VERSION = '1.01';
@EXPORT    = ();
@EXPORT_OK = qw( num2en num2en_ordinal );

#--------------------------------------------------------------------------
# Here begins the old junk, including mock-object stuff:

$TRUE = 1; $FALSE = 0; $MODE = "American";
sub import { 
  @_ = grep { $_ ne 'American' and $_ ne 'British' } @_;
  goto &Exporter::import;
}

sub new {
  my $class = shift;
  my $obj = bless [undef],  ref($class) || $class;
  $obj->parse($_[0]);
  return $obj;
}

sub parse { $_[0][0] = $_[1]; return 1; }
sub get_string { num2en( $_[0][0] ); }

# End of legacy stuff.
#--------------------------------------------------------------------------

@D{0 .. 20, 30,40,50,60,70,80,90} = qw|
 zero
 one two three four five six seven eight nine ten
 eleven twelve thirteen fourteen fifteen
 sixteen seventeen eighteen nineteen
 twenty thirty forty fifty sixty seventy eighty ninety
|;


@Card2ord{  qw| one   two    three five  eight  nine  twelve  |}
 =          qw| first second third fifth eighth ninth twelfth |;


{
  my $c = 0;
  for ( '', qw<
    thousand million billion trillion quadrillion quintillion sextillion
    septillion octillion nonillion
  > ) {
    $Mult{$c} = $_;
    $c++;
  }
}

#==========================================================================

sub num2en_ordinal {
   #  Cardinals are [one two three...]
   #  Ordinals  are [first second third...]
  
  return undef unless defined $_[0] and length $_[0];
  my($x) = $_[0];
  
  $x = num2en($x);
  return $x unless $x;
  $x =~ s/(\w+)$//s   or return $x . "th";
  my $last = $1;

  $last =
   $Card2ord{$last} || ( $last =~ s/y$/ieth/ && $last ) || ( $last . "th" );

  return "$x$last";
}

#==========================================================================

sub num2en {
  my $x = $_[0];
  return undef unless defined $x and length $x;

  return 'not-a-number'      if $x eq 'NaN';
  return 'positive infinity' if $x =~ m/^\+inf(?:inity)?$/si;
  return 'negative infinity' if $x =~ m/^\-inf(?:inity)?$/si;
  return          'infinity' if $x =~  m/^inf(?:inity)?$/si;

  return $D{$x} if exists $D{$x};  # the most common cases

  # Make sure it's not in scientific notation:
  {  my $e = _e2en($x);  return $e if defined $e; }
  
  my $orig = $x;

  $x =~ s/,//g; # nix any commas

  my $sign;
  $sign = $1 if $x =~ s/^([-+])//s;
  
  my($int, $fract);
  if(    $x =~ m<^\d+$>          ) { $int = $x }
  elsif( $x =~ m<^(\d+)\.(\d+)$> ) { $int = $1; $fract = $2 }
  elsif( $x =~ m<^\.(\d+)$>      ) { $fract = $1 }
  else {
    DEBUG and print "Not a number: \"orig\"\n";
    return undef;
  }
  
  DEBUG and printf " Working on Sign[%s]  Int2en[%s]  Fract[%s]  < \"%s\"\n",
   map defined($_) ? $_ : "nil", $sign, $int, $fract, $orig;
  
  return join ' ', grep defined($_) && length($_),
    _sign2en($sign),
    _int2en($int),
    _fract2en($fract),
  ;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

sub _sign2en {
  return undef unless defined $_[0] and length $_[0];  
  return 'negative' if $_[0] eq '-';
  return 'positive' if $_[0] eq '+';
  return "WHAT_IS_$_[0]";
}

sub _fract2en {    # "1234" => "point one two three four"
  return undef unless defined $_[0] and length $_[0];  
  my $x = $_[0];
  return join ' ', 'point', map $D{$_}, split '', $x;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# The real work:

sub _int2en {
  return undef unless defined $_[0] and length $_[0]
   and $_[0] =~ m/^\d+$/s;

  my($x) = $_[0];

  return $D{$x} if defined $D{$x};  # most common/irreg cases
  
  if( $x =~ m/^(.)(.)$/ ) {
    return  $D{$1 . '0'} . '-' . $D{$2};
     # like    forty        -     two
      # note that neither bit can be zero at this point
     
  } elsif( $x =~ m/^(.)(..)$/ ) {
    my($h, $rest) = ("$D{$1} hundred", $2);
    return $h if $rest eq '00';
    return "$h and " . _int2en(0 + $rest);
  } else {
    return _bigint2en($x);
  }
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

sub _bigint2en {
  return undef unless defined $_[0] and length $_[0]
   and $_[0] =~ m/^\d+$/s;

  my($x) = $_[0];

  my @chunks;  # each:  [ string, exponent ]
  
  {
    my $groupnum = 0;
    my $num;
    while( $x =~ s<(\d{1,3})$><>s ) { # pull at most three digits from the end
      $num = $1 + 0;
      unshift @chunks, [ $num, $groupnum ] if $num;
      ++$groupnum;
    }
    return $D{'0'} unless @chunks;  # rare but possible
  }
  
  my $and;
  
  $and = 'and' if  $chunks[-1][1] == 0  and  $chunks[-1][0] < 100;
   # The special 'and' that shows up in like "one thousand and eight"
   # and "two billion and fifteen", but not "one thousand [*and] five hundred"
   # or "one million, [*and] nine"

  _chunks2en( \@chunks );

  $chunks[-2] .= " and" if $and and @chunks > 1;
  return "$chunks[0] $chunks[1]" if @chunks == 2;
   # Avoid having a comma if just two units
  return join ", ", @chunks;
}


sub _chunks2en {
  my $chunks = $_[0];
  return unless @$chunks;
  my @out;
  foreach my $c (@$chunks) {
    push @out,   $c = _groupify( _int2en( $c->[0] ),  $c->[1] )  if $c->[0];
  }
  @$chunks = @out;
  return;
}

sub _groupify {
  # turn ("seventeen", 3) => "seventeen billion"
  my($basic, $multnum) = @_;
  return  $basic unless $multnum;  # the first group is unitless
  DEBUG > 2 and print "  Groupifying $basic x $multnum mults\n";
  return "$basic $Mult{$multnum}"  if  $Mult{$multnum};
   # Otherwise it must be huuuuuge, so fake it with scientific notation
  return "$basic " . "times ten to the " . num2en_ordinal($basic * 3);
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#
# Because I can never remember this:
#
#  3.1E8
#  ^^^   is called the "mantissa"
#      ^ is called the "exponent"
#         (the implicit "10" is the "base" a/k/a "radix")

sub _e2en {
  my $x = $_[0];

  my($m, $e);
  if( $x =~
    m<
            ^(
                [-+]?  # leading sign
                (?:
                    [\d,]+  |  [\d,]*\.\d+  # number
                )
              )
            [eE]
            (-?\d+)   # mantissa, has to be an integer
            $
        >x
  ) {
    ($m, $e) = ($1, $2);
    DEBUG and print "  Scientific notation: [$x] => $m E $e\n";
    $e += 0;
    return num2en($m) . ' times ten to the ' . num2en_ordinal($e);
  } else {
    DEBUG and print "  Okay, $x isn't in exponential notation\n";
    return undef;
  }
}

#==========================================================================
1;

__END__