Data::Search - Data structure search


Data-Search documentation Contained in the Data-Search distribution.

Index


Code Index:

NAME

Top

Data::Search - Data structure search

SYNOPSIS

Top

use Data::Search;

 $data = { ... };
 @results = datasearch( data => $data, search => 'values',
                        find => qr/string/, return => 'hashcontainer' );

DESCRIPTION

Top

datasearch - Search data structures

This function allows you to search arbitrarily large/complex data structures for particular elements. You can search hash keys, or hash/array values, for a number/string or regular expression. The datasearch function can return either the found hash keys, the found values (which could be data structures themselves) or the container of the key or value (which is always going to be a data structure)

By default, hash keys are searched, and the corresponding values are returned. To search hash or array values, specify SEARCH => 'values'. To search both values and keys, specify SEARCH => 'all'.

To find an exact match of a string, set FIND => 'string'. To use a regular expression use the qr operator: FIND => qr/^name.*/i FIND may also be a 2 element array, to search for a key-value pair.

To return the hash keys found (or the hash keys corresponding to searched values), specify RETURN => 'keys'. To return both keys and values specify RETURN => 'all'.

You can also return the data structure containing the found key/value.

To do that, specify RETURN => 'container'. This will return the immediate container, either a hash or an array reference. You can also choose to get the closest hash container (even if the value was inside an array) by specifying RETURN => 'hashcontainer'.

Similarly, you can return the closest array container (even though the value found was a hash value or hash key) by specifying RETURN => 'arraycontainer'

Also, you can get an outer container by doing RETURN => 'container:xyz' in which case the container returned would be a structure pointed to by key xyz (if found to contain the search element somewhere inside it). Please see the examples at the end of this document.

ARGUMENTS The following arguments are accepted (case-insensitively). The only mandatory arguments are DATA and FIND.

 data   => Reference of structure to search
 search => What elements to search: keys|values|all (default: keys)
 find   => Look for: string | qr/regex/ | [ key => value ]
 return => What to return: keys|values|all|
             container|hashcontainer|arraycontainer|container:key_name

RETURN VALUES

Returns a list of matching elements (could be strings or references to internal parts (hashes/arrays) of the data structure.

EXAMPLES

 my @results = datasearch( data => $ref, find => 'name' );
That will return all values pointed to by hash keys called 'name'

 my @results = datasearch( data => $ref, search => 'values',
     find => qr/alex/i, return => 'key' );
That will return all keys that point to strings that match "alex"
case insensitively.

 my @results = datasearch( data => $ref, search => 'keys',
     find => qr/_id$/, return => 'all' );
That will return all keys that end with "_id", and all values
pointed to by those keys.

 my @results = datasearch( data => $ref, return => 'container:myrecord',
                 find => [ suffix => 'Jr' ] )
That implies search=>'all', searches for a key 'suffix'
that has value 'Jr', and returns any matching hashes pointed to by a key
named myrecord (even if suffix is deep inside those hashes)


Data-Search documentation Contained in the Data-Search distribution.

package Data::Search;
use 5.005;
use strict;
use warnings;
require Exporter;
our $VERSION = '0.03';
our @ISA = qw(Exporter);
our @EXPORT= qw(datasearch);

sub datasearch {
    my $args = get_args( [qw(FIND SEARCH RETURN DATA)], @_ );

    die "FIND argument is required" unless defined $args->{FIND};
    die "DATA argument is required" unless defined $args->{DATA};

    my $sk = 1 if !$args->{SEARCH} || $args->{SEARCH} =~ /key|all/
      or ref($args->{FIND}) eq 'ARRAY';
    my $sv = 1 if $args->{SEARCH} && $args->{SEARCH} =~ /value|all/;

    my $rv = 1 if !$args->{RETURN} || $args->{RETURN} =~ /value|all/;
    my $rk = 1 if $args->{RETURN} && $args->{RETURN} =~ /key|all/;
    my $rc = $args->{RETURN} && $args->{RETURN} =~ /container/
           ? $args->{RETURN} : 0;

    my (@results, @refs, $container);
    @results = _datasearch( $args->{DATA}, $args->{FIND}, $sk, $sv, $rv, $rk,
            $rc, \@refs, undef, undef, undef, 0 );

    my @unique;
    foreach my $p ( @results ) { # Weed out duplicate references
        push @unique, $p unless ref($p) and grep { ref && $_ == $p } @unique;
    }
    return @unique;
} ## end sub datasearch

# Internal recursive function called by datasearch
sub _datasearch {
    my ($p, $f, $sk, $sv, $rv, $rk, $rc, $refs, $container, $key, $rr, $depth)
      = @_;
    # print "DEPTH  IN=$depth\n";
    my ($root) = $rc =~ /:(.+)/;
    if ( ref($p) ) {
        if ( grep { $p == $_ } @$refs ) {
            warn "Skipping duplicate reference to $p";
            return;
        }
        push @$refs, $p;
    }

    my @results;
    if ( ref($p) && $p =~ /HASH/ ) {
        $container = $p unless $rc =~ /array/;
        foreach my $k ( keys %$p ) {
            $rr = $p->{$k} if $root && $root eq $k;
            my ($f1, $f2) = ref($f) eq 'ARRAY' ? ($f->[0], $f->[1]) : $f;
            if ( $sk and ref($f1) eq 'Regexp' && $k =~ /$f1/ || $k eq $f1 ) {
                if ( ! defined $f2 or
                        ref($f2) eq 'Regexp' && $p->{$k} =~ /$f2/
                        || $p->{$k} eq $f2 ) {
                    if ( $rc ) {
                        if ( $root ) {
                            push @results, $rr if $rr;
                        } else {
                            push @results, $container;
                        }
                    } else {
                        push @results, $k if $rk;
                        push @results, $p->{$k} if $rv;
                    }
                }
            }
            if ( my @r = _datasearch( $p->{$k}, $f, $sk, $sv, $rv, $rk,
                        $rc, $refs, $container, $k, $rr, $depth+1 ) ) {
                push @results, @r;
            }
        }
    } elsif ( ref($p) && $p =~ /ARRAY/ ) {
        $container = $p unless $rc =~ /hash/;
        foreach ( @$p ) {
            if ( my @r = _datasearch( $_, $f, $sk, $sv, $rv, $rk,
                        $rc, $refs, $container, $key, $rr, $depth+1 ) ) {
                push @results, @r;
            }
        }
    } elsif ( !ref($p) && defined $p && $sv and
            ref($f) eq 'Regexp' && $p =~ /$f/ || $p eq $f ) {
        if ( $rc ) {
            if ( $root ) {
                push @results, $rr if $rr;
            } else {
                push @results, $container;
            }
        } else {
            push @results, $p if $rv;
            push @results, $key if $rk && defined $key;
        }
    }
    # print "DEPTH OUT=$depth\n";
    return @results;
} ## end sub datasearch

# Return a hash of named parameters (keys converted to upper case)
sub get_args {
    # Called as get_args(@_) or as get_args( [arg, arg2...], @_ )
    my $valid_arg_list = ( ref($_[0]) eq 'ARRAY' ? shift : '' );

    die "get_args got odd number of arguments"
      unless (@_/2 == int(@_/2));

    my $args;
    for ( my $n = 0 ; $n < $#_ ; $n += 2 ) {
        $args->{ uc $_[$n] } = $_[ $n + 1 ];
    }

    # Do argument checking, if list of valid arguments was given
    if ($valid_arg_list) {
        foreach my $arg (keys %$args) {
            die "get_args: Argument \"$arg\" is invalid"
                unless grep (/^\Q$arg\E$/, @$valid_arg_list);
        }
    }
    return $args;
}

1;