No::Sort - Norwegian sorting


Norge documentation Contained in the Norge distribution.

Index


Code Index:

NAME

Top

No::Sort - Norwegian sorting

SYNOPSIS

Top

  use No::Sort;
  @sortert = no_sort @norske_ord;

DESCRIPTION

Top

This module provde the function no_sort() which sort a ISO-8859/1 encoded string according to Norwegian practice. The routine works like the normal perl sort routine, but the optional first argument is special. It can either be a reference to the strxfrm() function to use while sorting or a reference to a hash used to transform the words while sorting.

You can also import the no_xfrm() function which is used for standard sorting. It can be useful to base your custom transformation function on it. If we for instance would like to sort "Aa" as "Å" we could implement it like this:

  use No::Sort qw(no_sort no_xfrm);
  sub my_xfrm {
      my $word = shift;
      $word =~ s/A[aA]/Å/g;
      $word =~ s/aa/å/g;
      no_xfrm($word);
  }
  @sorted = no_sort \&my_xfrm, @names;

By the way, the my_xfrm shown in this example can be imported from this module under the name 'no_aa_xfrm':

  use No::Sort qw(no_sort no_aa_xfrm);
  @sorted = no_sort \&no_aa_xfrm, @names;

If you set the $No::Sort::DEBUG variable to a TRUE value, then we will make some extra noise on STDOUT while sorting.

The module can also export functions for up/down casing ISO-8859/1 strings. These functions are called latin1_uc(), latin1_lc(), latin1_ucfirst(), latin1_lcfirst().

SEE ALSO

Top

perllocale

AUTHORS

Top

Hallvard B Furuseth <h.b.furuseth@usit.uio.no>, Gisle Aas <gisle@aas.no>


Norge documentation Contained in the Norge distribution.

package No::Sort;

require 5.002;
use strict;
use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION $DEBUG);
require Exporter;
@ISA=qw(Exporter);
@EXPORT=qw(no_sort);
@EXPORT_OK=qw(no_xfrm no_aa_xfrm
	      latin1_uc latin1_lc latin1_ucfirst latin1_lcfirst);

$VERSION = sprintf("%d.%02d", q$Revision: 1.3 $ =~ /(\d+)\.(\d+)/);


sub no_sort {
    my $xfrm;  # ref to sort hash
    if (ref $_[0]) {
	if (ref($_[0]) eq "CODE") {
	    my $code = shift;
	    @{$xfrm}{@_} = map &$code($_), @_;
	} elsif (ref($_[0]) eq "HASH") {
	    $xfrm = shift;
	}
    }
    @{$xfrm}{@_} = map no_xfrm($_), @_ unless $xfrm;

    if ($DEBUG) {
	my @s = sort { $xfrm->{$a} cmp $xfrm->{$b} || $a cmp $b } @_;
	printf STDERR "%-20s %s\n", "ORD", "SORTERES SOM";
	print STDERR "-" x 20, " ", "-" x 40, "\n";
	for (@s) {
	    printf STDERR "%-20s %s\n", $_, $xfrm->{$_};
	}
	return @s;
    }

    sort { $xfrm->{$a} cmp $xfrm->{$b} || $a cmp $b } @_;
}

sub no_xfrm {
    my $p1 = shift;

    # Ikke-alfanumeriske tegn regnes som en enkelt blank
    # (eller sikkert litt mer komplisert, f.eks whitespace -> blank,
    # punktum o.l -> et annet "lite" tegn, med blanke fjernet på begge
    # sider, osv...
    $p1 =~ tr/\0-\040\177\200-\240/ /s;
    $p1 =~ tr/ 0-9_A-Za-zÀ-ÖØ-ßà-öø-ÿ/,/cs
	and $p1 =~ s/,[ ,]+/,/g;

    # Plasser æøå i riktig rekkefølge.  Tar med svensk äø også.
    # (Egentlig burde *alle* tegn transformeres slik at ting kommer i
    # riktig rekkefølge her, men da blir resten av programmet så uleselig...)
    $p1 =~ tr[æäøöåÆÄØÖÅ]
	     	     [ååææøÅÅÆÆØ];

    # Aksenter telles bare hvis uaksentede tegn er like
    my $p2 = $p1;
    $p2 =~ tr[ÀÁÂÃÄÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖÙÚÛÜÝÞßàáâãäçèéêëìíîïðñòóôõöùúûüýþÿ]
                          [AAAAÆCEEEEIIIIDNOOOOØUUUUYTSaaaaæceeeeiiiidnooooøuuuuyty];

    # Store & små bokstaver er bare forskjellig hvis alt annet er likt
    my $p3 = $p2;
    $p3 =~ tr[A-ZÆØÅ]
                          [a-zæøå];

    join("\1", $p3, $p2, $p1);
}

sub no_aa_xfrm {
      my $word = shift;
      $word =~ s/A[aA]/Å/g;
      $word =~ s/aa/å/g;
      no_xfrm($word);
}

# Some additional case convertion routines that does not really have
# much to do with sorting.

sub latin1_lc
{
    my $str = shift;
    $str =~ tr[A-ZÀ-ÖØ-Þ]
	      	      [a-zà-öø-þ];
    $str;
}

sub latin1_uc
{
    my $str = shift;
    $str =~ tr[a-zà-öø-þ]
	      	      [A-ZÀ-ÖØ-Þ];
    $str;
}

sub latin1_ucfirst
{
    my $str = shift;
    $str =~ s/(.)/latin1_uc($1)/es;
    $str;
}

sub latin1_lcfirst
{
    my $str = shift;
    $str =~ s/(.)/latin1_lc($1)/es;
    $str;
}

1;