| Statistics-Cluto documentation | Contained in the Statistics-Cluto distribution. |
Statistics::Cluto - Perl binding for CLUTO
Download CLUTO from http://glaros.dtc.umn.edu/gkhome/views/cluto.
Find libcluto.a which matches your environment and place it under
your library path (or specify its path with LIBS option as shown below).
Then do:
perl Makefile.PL [LIBS='-L/where/to/find/libcluto.a -lcluto'] make make test make install
Tested with cluto-2.1.2/Darwin-i386, cluto-2.1.2/Darwin-ppc and cluto-2.1.1/Linux-i686.
use Statistics::Cluto;
use Data::Dumper;
my $c = new Statistics::Cluto;
$c->set_dense_matrix(4, 5, [
[8, 8, 0, 3, 2],
[2, 9, 9, 1, 4],
[7, 6, 1, 2, 3],
[1, 7, 8, 2, 1]
]);
$c->set_options({
rowlabels => [ 'row0', 'row1', 'row2', 'row3' ],
collabels => [ 'col0', 'col1', 'col2', 'col3', 'col4' ],
nclusters => 2,
rowmodel => CLUTO_ROWMODEL_NONE,
colmodel => CLUTO_COLMODEL_NONE,
pretty_format => 1,
});
my $clusters = $c->VP_ClusterRB;
print Dumper $clusters;
my $cluster_features = $c->V_GetClusterFeatures;
print Dumper $cluster_features;
This is a perl binding for CLUTO. Please refer to the CLUTO's manual sections 5.6 - 5.8 for details of each function. Basically, Statistics::Cluto has all corresponding methods for functions described in the manual.
Initial matrix can be set either via set_dense_matrix or via
set_sparse_matrix method.
# loading 4x5 dense matrix
#
# 1 1 0 1 1
# 1 0 0 1 0
# 0 1 1 0 0
# 0 0 1 0 0
my $c = new Statistics::Cluto;
my $nrows = 4;
my $ncols = 5;
my $rowval = [
[1, 1, 0, 0, 1],
[1, 1, 0, 1, 1],
[1, 0, 1, 1, 0],
[1, 0, 1, 0, 0]
];
$c->set_dense_matrix($nrows, $ncols, $rowval);
# loading 4x5 sparse matrix
#
# 1 1 0 1 1
# 1 0 0 1 0
# 0 1 1 0 0
# 0 0 1 0 0
my $c = new Statistics::Cluto;
my $nrows = 4;
my $ncols = 5;
my $rowval = [
[1, 1, 2, 1, 4, 1, 5, 1],
[1, 1, 4, 1],
[2, 1, 3, 1],
[3, 1]
];
$c->set_sparse_matrix($nrows, $ncols, $rowval)
Sparse matrix can also be set with set_raw_sparse_matrix,
using the data format described in the manual section 3.3, Fig 16.
# loading sparse matrix via set_raw_sparse_matrix() # # 1 1 0 1 1 # 1 0 0 1 0 # 0 1 1 0 0 # 0 0 1 0 0 my $c = new Statistics::Cluto; my $nrows = 4; my $ncols = 5; my $rowptr = [0, 4, 6, 8, 9]; my $rowind = [0, 1, 3, 4, 0, 3, 1, 2, 2]; my $rowval = [1, 1, 1, 1, 1, 1, 1, 1, 1]; $c->set_raw_sparse_matrix($nrows, $ncols, $rowptr, $rowind, $rowval);
Input parameters nrows, ncols, rowptr, rowind, rowval are
set automatically when initial matrix is loaded. All other input
parameters should be set before calling clustering functions via
set_options method. See sections 5.6 - 5.8 for necessary parameters.
$c->set_options({
rowlabels => ['row0', 'row1', 'row2', 'row3', 'row4'],
collabels => ['col0', 'col1', 'col2', 'col3', 'col4'],
nclusters => 2,
nfeatures => 2,
clfun => CLUTO_CLFUN_I2,
treetype => CLUTO_TREE_TOP,
});
CLUTO's api functions described in the manual sections from 5.6 to 5.8 can be called with methods of the same name, but without prefix "CLUTO_".
e.g. CLUTO_VP_ClusterDirect (in section 5.6.1) is named VP_ClusterDirect
in this package.
Routines with a single output parameter will return a single value / arrayref. Routines with multiple output parameters will return an array, each member of the array being the output parameters appearing in the same order as the manual.
# suppose $c is initialized with 5x5 sparse matrix:
# col0 ... col4
# row0: 2 2 0 2 2
# row1: 2 1 0 1 4
# row2: 0 2 5 0 0
# row3: 0 1 6 0 0
# row4: 2 1 0 3 4
$c->set_options({
rowlabels => ['row0', 'row1', 'row2', 'row3', 'row4'],
collabels => ['col0', 'col1', 'col2', 'col3', 'col4'],
nclusters => 2,
nfeatures => 2,
});
my $part = $c->VP_ClusterDirect;
# $part = [
# '1',
# '1',
# '0',
# '0',
# '1'
# ];
my ($internalids, $internalwgts, $externalids, $externalwgts) = $c->V_GetClusterFeatures;
# $internalids =
# [
# '2',
# '0',
# '4',
# '0'
# ]
# $internalwgts =
# [
# '1',
# '0',
# '0.598181843757629',
# '0.209491595625877'
# ]
# $externalids =
# [
# '2',
# '4',
# '2',
# '4'
# ]
# $externalwgts =
# [
# '0.5',
# '0.299090921878815',
# '0.5',
# '0.299090921878815'
# ]
Please refer to the manual for the details of the returned data structure.
When pretty_format option is set to 1, results are returned in a single
hashref, and in a (hopefully) little bit more comprehensible way.
Meaning of the returned data should be pretty much self-explanatory.
# with the same matrix and options as above...
$c->set_options({ pretty_format => 1 });
my $result = $c->VP_ClusterDirect;
# $result =
# [
# [
# { 'row' => 2, 'rowlabel' => 'row2' },
# { 'row' => 3, 'rowlabel' => 'row3' }
# ],
# [
# { 'row' => 0, 'rowlabel' => 'row0' },
# { 'row' => 1, 'rowlabel' => 'row1' },
# { 'row' => 4, 'rowlabel' => 'row4' }
# ]
# ];
$result = $c->V_GetClusterFeatures;
# $result =
# [
# [
# {
# 'discriminating' => [
# {
# 'externalwgt' => '0.5',
# 'collabel' => 'col2',
# 'externalid' => 2
# },
# {
# 'externalwgt' => '0.299090921878815',
# 'collabel' => 'col4',
# 'externalid' => 4
# }
# ],
# 'descriptive' => [
# {
# 'internalid' => 2,
# 'internalwgt' => '1',
# 'collabel' => 'col2'
# },
# {
# 'internalid' => 0,
# 'internalwgt' => '0',
# 'collabel' => 'col0'
# }
# ]
# },
# {
# 'discriminating' => [
# {
# 'externalwgt' => '0.5',
# 'collabel' => 'col2',
# 'externalid' => 2
# },
# {
# 'externalwgt' => '0.299090921878815',
# 'collabel' => 'col4',
# 'externalid' => 4
# }
# ],
# 'descriptive' => [
# {
# 'internalid' => 4,
# 'internalwgt' => '0.598181843757629',
# 'collabel' => 'col4'
# },
# {
# 'internalid' => 0,
# 'internalwgt' => '0.209491595625877',
# 'collabel' => 'col0'
# }
# ]
# }
# ]
# ];
use Statistics::Cluto qw(:all)
will export all constants defined in cluto.h. (Auto generated by
h2xs).
See section 5 of CLUTO's manual, or cluto.h for details.
CLUTO_CLFUN_CLINK CLUTO_CLFUN_CLINK_W CLUTO_CLFUN_CUT CLUTO_CLFUN_E1 CLUTO_CLFUN_G1 CLUTO_CLFUN_G1P CLUTO_CLFUN_H1 CLUTO_CLFUN_H2 CLUTO_CLFUN_I1 CLUTO_CLFUN_I2 CLUTO_CLFUN_MMCUT CLUTO_CLFUN_NCUT CLUTO_CLFUN_RCUT CLUTO_CLFUN_SLINK CLUTO_CLFUN_SLINK_W CLUTO_CLFUN_UPGMA CLUTO_CLFUN_UPGMA_W CLUTO_COLMODEL_IDF CLUTO_COLMODEL_NONE CLUTO_CSTYPE_BESTFIRST CLUTO_CSTYPE_LARGEFIRST CLUTO_CSTYPE_LARGESUBSPACEFIRST CLUTO_DBG_APROGRESS CLUTO_DBG_CCMPSTAT CLUTO_DBG_CPROGRESS CLUTO_DBG_MPROGRESS CLUTO_DBG_PROGRESS CLUTO_DBG_RPROGRESS CLUTO_GRMODEL_ASYMETRIC_DIRECT CLUTO_GRMODEL_ASYMETRIC_LINKS CLUTO_GRMODEL_EXACT_ASYMETRIC_DIRECT CLUTO_GRMODEL_EXACT_ASYMETRIC_LINKS CLUTO_GRMODEL_EXACT_SYMETRIC_DIRECT CLUTO_GRMODEL_EXACT_SYMETRIC_LINKS CLUTO_GRMODEL_INEXACT_ASYMETRIC_DIRECT CLUTO_GRMODEL_INEXACT_ASYMETRIC_LINKS CLUTO_GRMODEL_INEXACT_SYMETRIC_DIRECT CLUTO_GRMODEL_INEXACT_SYMETRIC_LINKS CLUTO_GRMODEL_NONE CLUTO_GRMODEL_SYMETRIC_DIRECT CLUTO_GRMODEL_SYMETRIC_LINKS CLUTO_MEM_NOREUSE CLUTO_MEM_REUSE CLUTO_MTYPE_HEDGE CLUTO_MTYPE_HSTAR CLUTO_MTYPE_HSTAR2 CLUTO_OPTIMIZER_MULTILEVEL CLUTO_OPTIMIZER_SINGLELEVEL CLUTO_ROWMODEL_LOG CLUTO_ROWMODEL_MAXTF CLUTO_ROWMODEL_NONE CLUTO_ROWMODEL_SQRT CLUTO_SIM_CORRCOEF CLUTO_SIM_COSINE CLUTO_SIM_EDISTANCE CLUTO_SIM_EJACCARD CLUTO_SUMMTYPE_MAXCLIQUES CLUTO_SUMMTYPE_MAXITEMSETS CLUTO_TREE_FULL CLUTO_TREE_TOP CLUTO_VER_MAJOR CLUTO_VER_MINOR CLUTO_VER_SUBMINOR
http://glaros.dtc.umn.edu/gkhome/views/cluto
Ikuhiro IHARA <tsukue@gmail.com>
Copyright (C) 2007 by Ikuhiro IHARA
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.5 or, at your option, any later version of Perl 5 you may have available.
| Statistics-Cluto documentation | Contained in the Statistics-Cluto distribution. |
package Statistics::Cluto; use 5.008005; use strict; use warnings; use Carp; require Exporter; use AutoLoader; our @ISA = qw(Exporter); # Items to export into callers namespace by default. Note: do not export # names by default without a very good reason. Use EXPORT_OK instead. # Do not simply export all your public functions/methods/constants. # This allows declaration use Statistics::Cluto ':all'; # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK # will save memory. our %EXPORT_TAGS = ( 'all' => [ qw( CLUTO_CLFUN_CLINK CLUTO_CLFUN_CLINK_W CLUTO_CLFUN_CUT CLUTO_CLFUN_E1 CLUTO_CLFUN_G1 CLUTO_CLFUN_G1P CLUTO_CLFUN_H1 CLUTO_CLFUN_H2 CLUTO_CLFUN_I1 CLUTO_CLFUN_I2 CLUTO_CLFUN_MMCUT CLUTO_CLFUN_NCUT CLUTO_CLFUN_RCUT CLUTO_CLFUN_SLINK CLUTO_CLFUN_SLINK_W CLUTO_CLFUN_UPGMA CLUTO_CLFUN_UPGMA_W CLUTO_COLMODEL_IDF CLUTO_COLMODEL_NONE CLUTO_CSTYPE_BESTFIRST CLUTO_CSTYPE_LARGEFIRST CLUTO_CSTYPE_LARGESUBSPACEFIRST CLUTO_DBG_APROGRESS CLUTO_DBG_CCMPSTAT CLUTO_DBG_CPROGRESS CLUTO_DBG_MPROGRESS CLUTO_DBG_PROGRESS CLUTO_DBG_RPROGRESS CLUTO_GRMODEL_ASYMETRIC_DIRECT CLUTO_GRMODEL_ASYMETRIC_LINKS CLUTO_GRMODEL_EXACT_ASYMETRIC_DIRECT CLUTO_GRMODEL_EXACT_ASYMETRIC_LINKS CLUTO_GRMODEL_EXACT_SYMETRIC_DIRECT CLUTO_GRMODEL_EXACT_SYMETRIC_LINKS CLUTO_GRMODEL_INEXACT_ASYMETRIC_DIRECT CLUTO_GRMODEL_INEXACT_ASYMETRIC_LINKS CLUTO_GRMODEL_INEXACT_SYMETRIC_DIRECT CLUTO_GRMODEL_INEXACT_SYMETRIC_LINKS CLUTO_GRMODEL_NONE CLUTO_GRMODEL_SYMETRIC_DIRECT CLUTO_GRMODEL_SYMETRIC_LINKS CLUTO_MEM_NOREUSE CLUTO_MEM_REUSE CLUTO_MTYPE_HEDGE CLUTO_MTYPE_HSTAR CLUTO_MTYPE_HSTAR2 CLUTO_OPTIMIZER_MULTILEVEL CLUTO_OPTIMIZER_SINGLELEVEL CLUTO_ROWMODEL_LOG CLUTO_ROWMODEL_MAXTF CLUTO_ROWMODEL_NONE CLUTO_ROWMODEL_SQRT CLUTO_SIM_CORRCOEF CLUTO_SIM_COSINE CLUTO_SIM_EDISTANCE CLUTO_SIM_EJACCARD CLUTO_SUMMTYPE_MAXCLIQUES CLUTO_SUMMTYPE_MAXITEMSETS CLUTO_TREE_FULL CLUTO_TREE_TOP CLUTO_VER_MAJOR CLUTO_VER_MINOR CLUTO_VER_SUBMINOR ) ] ); our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); our @EXPORT = qw( CLUTO_CLFUN_CLINK CLUTO_CLFUN_CLINK_W CLUTO_CLFUN_CUT CLUTO_CLFUN_E1 CLUTO_CLFUN_G1 CLUTO_CLFUN_G1P CLUTO_CLFUN_H1 CLUTO_CLFUN_H2 CLUTO_CLFUN_I1 CLUTO_CLFUN_I2 CLUTO_CLFUN_MMCUT CLUTO_CLFUN_NCUT CLUTO_CLFUN_RCUT CLUTO_CLFUN_SLINK CLUTO_CLFUN_SLINK_W CLUTO_CLFUN_UPGMA CLUTO_CLFUN_UPGMA_W CLUTO_COLMODEL_IDF CLUTO_COLMODEL_NONE CLUTO_CSTYPE_BESTFIRST CLUTO_CSTYPE_LARGEFIRST CLUTO_CSTYPE_LARGESUBSPACEFIRST CLUTO_DBG_APROGRESS CLUTO_DBG_CCMPSTAT CLUTO_DBG_CPROGRESS CLUTO_DBG_MPROGRESS CLUTO_DBG_PROGRESS CLUTO_DBG_RPROGRESS CLUTO_GRMODEL_ASYMETRIC_DIRECT CLUTO_GRMODEL_ASYMETRIC_LINKS CLUTO_GRMODEL_EXACT_ASYMETRIC_DIRECT CLUTO_GRMODEL_EXACT_ASYMETRIC_LINKS CLUTO_GRMODEL_EXACT_SYMETRIC_DIRECT CLUTO_GRMODEL_EXACT_SYMETRIC_LINKS CLUTO_GRMODEL_INEXACT_ASYMETRIC_DIRECT CLUTO_GRMODEL_INEXACT_ASYMETRIC_LINKS CLUTO_GRMODEL_INEXACT_SYMETRIC_DIRECT CLUTO_GRMODEL_INEXACT_SYMETRIC_LINKS CLUTO_GRMODEL_NONE CLUTO_GRMODEL_SYMETRIC_DIRECT CLUTO_GRMODEL_SYMETRIC_LINKS CLUTO_MEM_NOREUSE CLUTO_MEM_REUSE CLUTO_MTYPE_HEDGE CLUTO_MTYPE_HSTAR CLUTO_MTYPE_HSTAR2 CLUTO_OPTIMIZER_MULTILEVEL CLUTO_OPTIMIZER_SINGLELEVEL CLUTO_ROWMODEL_LOG CLUTO_ROWMODEL_MAXTF CLUTO_ROWMODEL_NONE CLUTO_ROWMODEL_SQRT CLUTO_SIM_CORRCOEF CLUTO_SIM_COSINE CLUTO_SIM_EDISTANCE CLUTO_SIM_EJACCARD CLUTO_SUMMTYPE_MAXCLIQUES CLUTO_SUMMTYPE_MAXITEMSETS CLUTO_TREE_FULL CLUTO_TREE_TOP CLUTO_VER_MAJOR CLUTO_VER_MINOR CLUTO_VER_SUBMINOR ); our $VERSION = '0.01'; sub AUTOLOAD { # This AUTOLOAD is used to 'autoload' constants from the constant() # XS function. my $constname; our $AUTOLOAD; ($constname = $AUTOLOAD) =~ s/.*:://; croak "&Statistics::Cluto::constant not defined" if $constname eq 'constant'; my ($error, $val) = constant($constname); if ($error) { croak $error; } { no strict 'refs'; # Fixed between 5.005_53 and 5.005_61 #XXX if ($] >= 5.00561) { #XXX *$AUTOLOAD = sub () { $val }; #XXX } #XXX else { *$AUTOLOAD = sub { $val }; #XXX } } goto &$AUTOLOAD; } sub DESTROY {} require XSLoader; XSLoader::load('Statistics::Cluto', $VERSION); # Preloaded methods go here. our $MATRIX_TYPE_DENSE = 0; our $MATRIX_TYPE_SPARSE = 1; our $NO_OPTIMIZE_SOLUTION = 0; our $OPTIMIZE_SOLUTION = 1; sub new { my $class = shift; my $self = { nrows => 0, ncols => 0, nnz => 0, rowptr => [], rowind => [], rowval => [], # global (not method-specific) defaults simfun => CLUTO_SIM_COSINE(), cstype => CLUTO_CSTYPE_BESTFIRST(), rowmodel => CLUTO_ROWMODEL_NONE(), colprune => 1.0, nnbrs => 40, grmodel => CLUTO_GRMODEL_EXACT_SYMETRIC_DIRECT(), edgeprune => -1, vtxprune => -1, mincomponent => 5, kwayrefine => $NO_OPTIMIZE_SOLUTION, ntrials => 10, niter => 10, seed => time, dbglvl => 0, nclusters => 1, nfeatures => 5, pretty_format => 0, @_, }; bless $self, $class; return $self; } sub set_options { my ($self, $opts) = @_; while (my ($key, $val) = each(%$opts)) { $self->{$key} = $val; } } # # matrix loading functions # sub set_sparse_matrix { my ($self, $nrows, $ncols, $rowval) = @_; die ('number of rows does not match') if ($nrows != $#$rowval + 1); $self->{matrix_type} = $MATRIX_TYPE_SPARSE; $self->{nrows} = $nrows; $self->{ncols} = $ncols; $self->{nnz} = 0; my @rowptr = (); my @rowind = (); my @rowval = (); for (my $rowptr = 0; $rowptr < $nrows; $rowptr++) { my $row = $$rowval[$rowptr]; push @rowptr, $#rowind + 1; for (my $j = 0; $j <= $#$row; $j+=2) { my $col = $$row[$j]; die ("inappropriate col#$col in row#".($rowptr+1)) if ($col > $ncols); push @rowind, $col - 1; push @rowval, $$row[$j + 1]; $self->{nnz} ++; } } push @rowptr, $#rowind + 1; $self->{rowptr} = \@rowptr; $self->{rowind} = \@rowind; $self->{rowval} = \@rowval; } sub set_raw_sparse_matrix { my ($self, $nrows, $ncols, $rowptr, $rowind, $rowval) = @_; # $$rowptr[$#$rowptr + 1] = $#$rowind + 1 if ($$rowptr[-1] != $#$rowind + 1); if ($$rowptr[-1] != $#$rowind + 1 or $#$rowptr != $nrows) { die('rowptr not appropriate'); } $self->{matrix_type} = $MATRIX_TYPE_SPARSE; $self->{nrows} = $nrows; $self->{ncols} = $ncols; $self->{nnz} = $#$rowval + 1; $self->{rowptr} = $rowptr; $self->{rowind} = $rowind; $self->{rowval} = $rowval; } sub set_dense_matrix { my ($self, $nrows, $ncols, $rowval) = @_; die ('number of rows does not match') if ($nrows != $#$rowval + 1); $self->{matrix_type} = $MATRIX_TYPE_DENSE; $self->{nrows} = $nrows; $self->{ncols} = $ncols; $self->{nnz} = -1; my @rowval = (); for (my $i = 0; $i <= $#$rowval; $i++) { my $row = $$rowval[$i]; die ('number of cols does not match: row #'.($i+1)) if ($#$row+1 != $self->{ncols}); push @rowval, @$row; } $self->{rowval} = \@rowval; } sub set_dense_matrix_as_sparse { my ($self, $nrows, $ncols, $matrix) = @_; my $rowval = []; die ('number of rows does not match') if ($nrows != $#$matrix + 1); for my $row_n (0..$nrows-1) { die ('number of cols does not match: row #'.($row_n+1)) if ($#{$matrix->[$row_n]} + 1 != $ncols); $rowval->[$row_n] = []; for my $col_n (0..$ncols-1) { my $val = $matrix->[$row_n][$col_n]; if ($val) { push @{$rowval->[$row_n]}, $col_n + 1; push @{$rowval->[$row_n]}, $val; } } } $self->set_sparse_matrix($nrows, $ncols, $rowval); } # # API wrappers # sub VP_ClusterDirect { my $self = shift; # set method-specific defaults $self->{crfun} ||= CLUTO_CLFUN_I2(); $self->{colmodel} ||= ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF()); # init return values $self->{part} = []; # call xs &_VP_ClusterDirect($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{crfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{ntrials}, $self->{niter}, $self->{seed}, $self->{dbglvl}, $self->{nclusters}, $self->{part}); return $self->{pretty_format} && $self->format_cluster || $self->{part}; } sub VP_ClusterRB { my $self = shift; # set method-specific defaults $self->{crfun} ||= CLUTO_CLFUN_I2(); $self->{colmodel} ||= ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF()); # init return values $self->{part} = []; # call xs &_VP_ClusterRB($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{crfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{ntrials}, $self->{niter}, $self->{seed}, $self->{cstype}, $self->{kwayrefine}, $self->{dbglvl}, $self->{nclusters}, $self->{part}); return $self->{pretty_format} && $self->format_cluster || $self->{part}; } sub VA_Cluster { my $self = shift; # set method-specific defaults $self->{crfun} ||= CLUTO_CLFUN_UPGMA(); $self->{colmodel} ||= ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF()); # init return values $self->{part} = []; $self->{ptree} = []; $self->{tsims} = []; $self->{gains} = []; # call xs &_VA_Cluster($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{crfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{dbglvl}, $self->{nclusters}, $self->{part}, $self->{ptree}, $self->{tsims}, $self->{gains}); return $self->{pretty_format} && { clusters => $self->format_cluster, tree => $self->format_tree } || map $self->{$_}, qw(part ptree tsims gains); } sub VA_ClusterBiased { my $self = shift; # set method-specific defaults $self->{crfun} ||= CLUTO_CLFUN_UPGMA(); $self->{colmodel} ||= ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF()); $self->{npclusters} = int($self->{nrows}**.5); # init return values $self->{part} = []; $self->{ptree} = []; $self->{tsims} = []; $self->{gains} = []; # call xs &_VA_ClusterBiased($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{crfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{dbglvl}, $self->{npclusters}, $self->{nclusters}, $self->{part}, $self->{ptree}, $self->{tsims}, $self->{gains}); return $self->{pretty_format} && { clusters => $self->format_cluster, tree => $self->format_tree } || map $self->{$_}, qw(part ptree tsims gains); } sub SP_ClusterDirect { my $self = shift; # set method-specific defaults $self->{crfun} ||= CLUTO_CLFUN_I2(); warn ("number of rows not equal to number of cols") if ($self->{nrows} != $self->{ncols}); # init return values $self->{part} = []; # call xs &_SP_ClusterDirect($self->{matrix_type}, $self->{nrows}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{crfun}, $self->{ntrials}, $self->{niter}, $self->{seed}, $self->{dbglvl}, $self->{nclusters}, $self->{part}); return $self->{pretty_format} && $self->format_cluster || $self->{part}; } sub SP_ClusterRB { my $self = shift; # set method-specific defaults $self->{crfun} ||= CLUTO_CLFUN_I2(); warn ("number of rows not equal to number of cols") if ($self->{nrows} != $self->{ncols}); # init return values $self->{part} = []; # call xs &_SP_ClusterRB($self->{matrix_type}, $self->{nrows}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{crfun}, $self->{ntrials}, $self->{niter}, $self->{seed}, $self->{cstype}, $self->{kwayrefine}, $self->{dbglvl}, $self->{nclusters}, $self->{part}); return $self->{pretty_format} && $self->format_cluster || $self->{part}; } sub VP_GraphClusterRB { my $self = shift; # method-specific defaults $self->{crfun} ||= CLUTO_CLFUN_CUT(); $self->{colmodel} ||= ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF()); # init return values $self->{part} = []; $self->{crvalue} = 0; # call xs my $rtn = &_VP_GraphClusterRB($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{grmodel}, $self->{nnbrs}, $self->{edgeprune}, $self->{vtxprune}, $self->{mincomponent}, $self->{ntrials}, $self->{seed}, $self->{cstype}, $self->{dbglvl}, $self->{nclusters}, $self->{part}, $self->{crvalue}); return $self->{pretty_format} && $self->format_cluster || [ $rtn, $self->{part}, $self->{crvalue} ]; } sub SP_GraphClusterRB { my $self = shift; # method-specific defaults $self->{crfun} ||= CLUTO_CLFUN_CUT(); $self->{colmodel} ||= ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF()); # init return values $self->{part} = []; $self->{crvalue} = 0; # call xs my $rtn = &_SP_GraphClusterRB($self->{matrix_type}, $self->{nrows}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{nnbrs}, $self->{edgeprune}, $self->{vtxprune}, $self->{mincomponent}, $self->{ntrials}, $self->{seed}, $self->{cstype}, $self->{dbglvl}, $self->{nclusters}, $self->{part}, $self->{crvalue}); return $self->{pretty_format} && $self->format_cluster || [ $rtn, $self->{part}, $self->{crvalue} ]; } sub SA_Cluster { my $self = shift; # set method-specific defaults $self->{crfun} ||= CLUTO_CLFUN_UPGMA(); # init return values $self->{part} = []; $self->{ptree} = []; $self->{tsims} = []; $self->{gains} = []; # call xs &_SA_Cluster($self->{matrix_type}, $self->{nrows}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{crfun}, $self->{dbglvl}, $self->{nclusters}, $self->{part}, $self->{ptree}, $self->{tsims}, $self->{gains}); return $self->{pretty_format} && { clusters => $self->format_cluster, tree => $self->format_tree } || map $self->{$_}, qw(part ptree tsims gains); } sub V_BuildTree { my $self = shift; # set method-specific defaults $self->{crfun} ||= CLUTO_CLFUN_I2(); $self->{colmodel} ||= ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF()); $self->{treetype} ||= CLUTO_TREE_TOP(); # init return values $self->{ptree} = []; $self->{tsims} = []; $self->{gains} = []; # call xs &_V_BuildTree($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{crfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{treetype}, $self->{dbglvl}, $self->{nclusters}, $self->{part}, $self->{ptree}, $self->{tsims}, $self->{gains}); return $self->{pretty_format} && $self->format_tree || map $self->{$_}, qw(ptree tsims gains); } sub S_BuildTree { my $self = shift; # set method-specific defaults $self->{crfun} ||= CLUTO_CLFUN_I2(); warn ("number of rows not equal to number of cols") if ($self->{nrows} != $self->{ncols}); $self->{treetype} ||= CLUTO_TREE_TOP(); # init return values $self->{ptree} = []; $self->{tsims} = []; $self->{gains} = []; # call xs &_S_BuildTree($self->{matrix_type}, $self->{nrows}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{crfun}, $self->{treetype}, $self->{dbglvl}, $self->{nclusters}, $self->{part}, $self->{ptree}, $self->{tsims}, $self->{gains}); return $self->{pretty_format} && $self->format_tree || map $self->{$_}, qw(ptree tsims gains); } sub V_GetGraph { my $self = shift; # set method-specific defaults $self->{colmodel} ||= ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF()); # init return values $self->{growptr} = []; $self->{growind} = []; $self->{growval} = []; # call xs &_V_GetGraph($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{grmodel}, $self->{nnbrs}, $self->{dbglvl}, $self->{growptr}, $self->{growind}, $self->{growval}); return map $self->{$_}, qw(growptr growind growval); } sub S_GetGraph { my $self = shift; # set method-specific defaults # init return values $self->{growptr} = []; $self->{growind} = []; $self->{growval} = []; # call xs &_S_GetGraph($self->{matrix_type}, $self->{nrows}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{grmodel}, $self->{nnbrs}, $self->{dbglvl}, $self->{growptr}, $self->{growind}, $self->{growval}); return map $self->{$_}, qw(growptr growind growval); } sub V_GetSolutionQuality { my $self = shift; # set method-specific defaults $self->{crfun} ||= CLUTO_CLFUN_I2(); $self->{colmodel} ||= ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF()); # init return values # call xs return &_V_GetSolutionQuality($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{crfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{nclusters}, $self->{part}); } sub S_GetSolutionQuality { my $self = shift; # set method-specific defaults $self->{crfun} ||= CLUTO_CLFUN_I2(); warn ("number of rows not equal to number of cols") if ($self->{nrows} != $self->{ncols}); # init return values # call xs return &_S_GetSolutionQuality($self->{matrix_type}, $self->{nrows}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{crfun}, $self->{nclusters}, $self->{part}); } sub V_GetClusterStats { my $self = shift; # set method-specific defaults # init return values $self->{pwgts} = []; $self->{cintsim} = []; $self->{cintsdev} = []; $self->{izscores} = []; $self->{cextsim} = []; $self->{cextsdev} = []; $self->{ezscores} = []; # call xs &_V_GetClusterStats($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{nclusters}, $self->{part}, $self->{pwgts}, $self->{cintsim}, $self->{cintsdev}, $self->{izscores}, $self->{cextsim}, $self->{cextsdev}, $self->{ezscores}); return $self->{pretty_format} && $self->format_cluster_stats || map $self->{$_}, qw(pwgts cintsim cintsdev izscores cextsim cextsdev ezscores); } sub S_GetClusterStats { my $self = shift; # set method-specific defaults warn ("number of rows not equal to number of cols") if ($self->{nrows} != $self->{ncols}); # init return values $self->{pwgts} = []; $self->{cintsim} = []; $self->{cintsdev} = []; $self->{izscores} = []; $self->{cextsim} = []; $self->{cextsdev} = []; $self->{ezscores} = []; # call xs &_S_GetClusterStats($self->{matrix_type}, $self->{nrows}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{nclusters}, $self->{part}, $self->{pwgts}, $self->{cintsim}, $self->{cintsdev}, $self->{izscores}, $self->{cextsim}, $self->{cextsdev}, $self->{ezscores}); return $self->{pretty_format} && $self->format_cluster_stats || map $self->{$_}, qw(pwgts cintsim cintsdev izscores cextsim cextsdev ezscores); } sub V_GetClusterFeatures { my $self = shift; # init return values $self->{internalids} = []; $self->{internalwgts} = []; $self->{externalids} = []; $self->{externalwgts} = []; # call xs &_V_GetClusterFeatures($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{nclusters}, $self->{part}, $self->{nfeatures}, $self->{internalids}, $self->{internalwgts}, $self->{externalids}, $self->{externalwgts}); return $self->{pretty_format} && $self->format_cluster_features || map $self->{$_}, qw(internalids internalwgts externalids externalwgts); } sub V_GetClusterSummaries { my $self = shift; # set method-specific defaults $self->{colmodel} ||= ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF()); $self->{sumtype} ||= CLUTO_SUMMTYPE_MAXCLIQUES(); # init return values $self->{r_nsum} = undef; $self->{r_spid} = []; $self->{r_swgt} = []; $self->{r_sumptr} = []; $self->{r_sumind} = []; # call xs &_V_GetClusterSummaries($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{nclusters}, $self->{part}, $self->{sumtype}, $self->{nfeatures}, $self->{r_nsum}, $self->{r_spid}, $self->{r_swgt}, $self->{r_sumptr}, $self->{r_sumind}); return $self->{pretty_format} && $self->format_cluster_summaries || map $self->{$_}, qw(r_nsum r_spid r_swgt r_sumptr r_sumind); } sub V_GetTreeStats { my $self = shift; # set method-specific defaults $self->{colmodel} ||= ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF()); warn ("ptree not set or size does not equal to 2*nclusters") if ($#{$self->{ptree}}+1 != $self->{nclusters}*2); # init return values $self->{pwgts} = []; $self->{cintsim} = []; $self->{cextsim} = []; # call xs &_V_GetTreeStats($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{nclusters}, $self->{part}, $self->{ptree}, $self->{pwgts}, $self->{cintsim}, $self->{cextsim}); return $self->{pretty_format} && $self->format_tree_stats || map $self->{$_}, qw(pwgts cintsim cextsim); } sub V_GetTreeFeatures { my $self = shift; # set method-specific defaults $self->{colmodel} ||= ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF()); warn ("ptree not set or size does not equal to 2*nclusters") if ($#{$self->{ptree}}+1 != $self->{nclusters}*2); # init return values $self->{internalids} = []; $self->{internalwgts} = []; $self->{externalids} = []; $self->{externalwgts} = []; # call xs &_V_GetTreeFeatures($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{nclusters}, $self->{part}, $self->{ptree}, $self->{nfeatures}, $self->{internalids}, $self->{internalwgts}, $self->{externalids}, $self->{externalwgts}); return $self->{pretty_format} && $self->format_tree_features || map $self->{$_}, qw(internalids internalwgts externalids externalwgts); } # # for prertty_format option # sub format_cluster { my $self = shift; my $clusters = []; for my $i (0..$self->{nrows}-1) { push @{$clusters->[$self->{part}->[$i]]}, { row => $i, rowlabel => $self->{rowlabels}->[$i] } if ($self->{part}->[$i] >= 0); } return $clusters; # return [ map { # rowlabel => $self->{rowlabels}->[$_], # cluster => $self->{part}->[$_] # }, (0..$self->{nrows}-1)] } sub format_cluster_stats { my $self = shift; return { clusters => [ map { pwgt => $self->{pwgts}->[$_], cintsim => $self->{cintsim}->[$_], cintsdev => $self->{cintsdev}->[$_], cextsim => $self->{cextsim}->[$_], cextsdev => $self->{cextsdev}->[$_], }, (0..$self->{nclusters}-1) ], rows => [ map { rowlabel => $self->{rowlabels}->[$_], izscore => $self->{izscores}->[$_], exscore => $self->{ezscores}->[$_] }, (0..$self->{nrows}-1) ] } } sub format_cluster_features { my $self = shift; return [ map { descriptive => [ map { internalid => $self->{internalids}->[$_], collabel => $self->{collabels}->[$self->{internalids}->[$_]], internalwgt => $self->{internalwgts}->[$_] }, (($_*$self->{nfeatures})..(($_ + 1)*$self->{nfeatures} - 1)) ], discriminating => [ map { externalid => $self->{externalids}->[$_], collabel => $self->{collabels}->[$self->{externalids}->[$_]], externalwgt => $self->{externalwgts}->[$_] }, (($_*$self->{nfeatures})..(($_ + 1)*$self->{nfeatures} - 1)) ], }, (0..$self->{nclusters}-1)]; } sub format_cluster_summaries { my $self = shift; return [ map { cluster => $self->{r_spid}->[$_], swgt => $self->{r_swgt}->[$_], features => [ map $self->{r_sumind}->[$_], ($self->{r_sumptr}->[$_]..($self->{r_sumptr}->[$_+1]-1)) ], }, (0..($self->{r_nsum}-1)) ]; } sub format_tree { my $self = shift; return [ map { parent => $self->{ptree}->[$_], tsims => $self->{tsims}->[$_], gains => $self->{gains}->[$_] }, (0..$#{$self->{ptree}}-1) ]; } sub format_tree_stats { my $self = shift; return [ map { cintsim => $self->{cintsim}->[$_], cextsim => $self->{cextsim}->[$_] }, (0..$self->{nclusters}*2-1) ]; } sub format_tree_features { my $self = shift; return [ map [ map { descriptive => [ map { internalid => $self->{internalids}->[$_], collabel => $self->{collabels}->[$self->{internalids}->[$_]], internalwgt => $self->{internalwgts}->[$_] }, grep defined($self->{internalids}->[$_]), (($_*$self->{nfeatures})..(($_ + 1)*$self->{nfeatures} - 1)) ], discriminating => [ map { externalid => $self->{externalids}->[$_], collabel => $self->{collabels}->[$self->{externalids}->[$_]], externalwgt => $self->{externalwgts}->[$_] }, grep defined($self->{externalids}->[$_]), (($_*$self->{nfeatures})..(($_ + 1)*$self->{nfeatures} - 1)) ], }, ($_..$_+$self->{nclusters}-1)] , (0..$self->{nclusters}*2-1) ]; } # Autoload methods go after =cut, and are processed by the autosplit program. 1; __END__ # Below is stub documentation for your module. You'd better edit it!