/usr/local/CPAN/Combine/Combine/GraphAlgorithm.pm


package Combine::GraphAlgorithm;

sub new {
    my $this = shift;
    my $class = ref($this) || $this;
    my $self =  {};
    $self->{NumberNodes}=0;
    $self->{Nodes} = {}; # a hash with all nodes
      #each node will have 2 lists (possibly empty): inlinks and outlinks
      #  and possibly a topic score associated: score
    bless ($self , $class);
    return $self;
}

sub addLink {
    my ($self,$fromnode, $tonode, $weight) = @_;
#    if ($fromnode == $tonode) { warn "No selflinking!"; return; }
#    if (!defined($self->{Nodes}->{$fromnode})) {
    if (! defined($weight)) { $weight = 1.0; }
#fails if link $from -> $to already added!!!
    push(@{$self->{Nodes}->{$fromnode}->{outlinks}}, $tonode);
    push(@{$self->{Nodes}->{$tonode}->{inlinks}}, $fromnode);
    ${$self->{Nodes}->{$fromnode}->{relevance}}{$tonode} += $weight;
}

sub setScore {
    my ($self,$node,$score) = @_;
    $self->{Nodes}->{$node}->{score} = $score;
}

sub nodes {
    my ($self) = @_;
    return keys(%{$self->{Nodes}});
}

sub outDegree {
    my ($self,$node) = @_;
    my @n = @{$self->{Nodes}->{$node}->{outlinks}};
    return $#n+1;
}

sub inDegree {
    my ($self,$node) = @_;
    my @n = @{$self->{Nodes}->{$node}->{inlinks}};
    return $#n+1;
}

sub hasScore {
    my ($self,$node) = @_;
    return defined($self->{Nodes}->{$node}->{score});
}

sub getScore {
    my ($self,$node) = @_;
    if ( defined($self->{Nodes}->{$node}->{score}) ) {
	return $self->{Nodes}->{$node}->{score};
    } else {return 0; }
}

sub deleteNode {
    my ($self,$node) = @_;
    delete($self->{Nodes}->{$node}->{outlinks});
    my @l = $self->linkedToBy($node);
    foreach my $n (@l) {
      #walk through list of nodes that link to me and remove my node
	my @links = $self->linksTo($n);
	@{$self->{Nodes}->{$n}->{outlinks}} = ();
	foreach my $l (@links) {
	    if ($l != $node) { push(@{$self->{Nodes}->{$n}->{outlinks}},$l); }
	}
    }
    delete($self->{Nodes}->{$node}->{inlinks});
    delete($self->{Nodes}->{$node}->{score});
    delete($self->{Nodes}->{$node}->{relevance});
    delete($self->{Nodes}->{$node});
}

sub linksTo {
    my ($self,$node) = @_;
    return @{$self->{Nodes}->{$node}->{outlinks}};
}

sub linkedToBy {
    my ($self,$node) = @_;
    return @{$self->{Nodes}->{$node}->{inlinks}};
}

sub addBackLinks {
    my ($self) = @_;
    my @pages = $self->nodes();
    foreach my $i (@pages) {
	my @backlinks = $self->linkedToBy($i);
	my @links = $self->linksTo($i);
	my %li = ();
	foreach my $l (@links) { $li{$l}=1; } #To detect duplicates
	foreach my $l (@backlinks) {
	    #$l is a backlink for $i
	    #use score divided by total no of backlinks as weight
            #what if link already exists?
	    if (!defined($li{$l})) {
		push(@{$self->{Nodes}->{$i}->{outlinks}}, $l);
	    }
	    ${$self->{Nodes}->{$i}->{relevance}}{$l} += $self->getScore($l)/($#backlinks+1);
	}
    }
}

sub PageRank {
    my ($self,$rmDang, $Bias) = @_;
    # Parameters
    # $rmDang (boolean) remove dangling pages NOT USED
    # $Bias (boolean) use bias vector when calculating PageRank
    # $d1 is an Graph object that contains bias vector as attribute
    #     to the vertices. May be modified by this subroutine!

    # Using algorithm (1) The Power method from "Deeper Inside PageRank"

    my @pages;
    my $n;
    my $i;

    my %Ri; #The pageRank vector (x(k?)T)
    my %Ri1;#The pageRank vector (x(k?)T)
    my %Ei; #Topical (personalization) vector (vT in alg)
    my $d = 0.85; #The alpha param

    @pages = $self->nodes();
    my $npages = $#pages + 1;
    print "Got $npages nodes\n";
    ########

    if ( $Bias ) {
        #Init Ei with autoclass scores
        #  maintain sum(Ei) == 1
	print "Initializing bias vector\n";
	my $totscore=0;
	foreach $i (@pages) {
	    $totscore += $self->getScore($i); 
	}
	$normconst = 1.0/$totscore;
	print "Using $normconst as normalization factor; Tot=$totscore\n";
	my $riNorm=0.0;
	foreach $i (@pages) {
	    $Ei{$i}=0.0;
	    $Ri{$i}=0.0;
	    if ( $self->hasScore($i) ) {
		$Ei{$i} = $self->getScore($i) * $normconst;
#		$Ri{$i}=1.0/$npages; #Uniform starting PageRank vector
		$Ri{$i}=$Ei{$i}; # Topic-score starting PageRank vector
	    } else { print "WARN node $i has no score\n"; }
	}
	my $eisum; foreach $i (@pages) { $eisum +=    $Ei{$i}; }
	print "EiSUM = $eisum\n";
	my $risum; foreach $i (@pages) { $risum +=    $Ri{$i}; }
	print "RiSUM = $risum\n";
    } else {
	#Uniform normalization (so that sum(Ei) = 1)
	foreach $i (@pages) { $Ri{$i}=1.0/$npages; $Ei{$i}= 1.0/$npages;}
    }

#Normalize relevance weights
    foreach $i (@pages) {
	# $i links to $l
	my $nlinks=$self->outDegree($i);
	if ($nlinks==0) {
	    next;
	}
	my @links = $self->linksTo($i); #Sparse P matrix
	my $sum=0.0;
	foreach my $l (@links) {
	    $sum += ${$self->{Nodes}->{$i}->{relevance}}{$l};
        }
#    print "Node $i: SUM=$sum\n";
        foreach my $l (@links) {
	    ${$self->{Nodes}->{$i}->{relevance}}{$l} = ${$self->{Nodes}->{$i}->{relevance}}{$l} / $sum;
            $r = ${$self->{Nodes}->{$i}->{relevance}}{$l};
#            print "  link to $l relevance=$r\n";
        }
    }

    my $loops=0;
    my $lenDelta = 1.0;
    my $Ri1sum;
    while ( ($lenDelta > 0.0000001) ) {
	$loops++;
	my $dangContr=0.0; # the term x(k-1)T*a
	foreach $i (@pages) {
	    # $i links to $l
	    my $nlinks=$self->outDegree($i);
	    if ($nlinks==0) {
		$dangContr += $Ri{$i};
		next;
	    }
	    my @links = $self->linksTo($i); #Sparse P matrix
#	    $mcontr = $Ri{$i}/($nlinks); #Uniform probability for all links
	    foreach my $l (@links) {
		#nonuniform jump probability
		my $mcontr = $Ri{$i} * (${$self->{Nodes}->{$i}->{relevance}}{$l});
		$Ri1{$l} += $mcontr; # x(k-1)T*P
#print "  Adding $mcontr to node $l making sum=$Ri1{$l}\n";
	    }
	}

	$Ri1sum=0.0;
	$lenDelta=0.0;
	foreach $i (@pages) {
	    my $tmp = $Ri{$i}; #Old PageRank
	    $Ri{$i} = $d * $Ri1{$i} + ($d * $dangContr + (1.0-$d))*$Ei{$i};
	    $Ri1sum += $Ri{$i};
	    $lenDelta = ($Ri{$i} - $tmp) * ($Ri{$i} - $tmp); #Euclidian length
	    $Ri1{$i}=0.0;
	}
	$lenDelta = sqrt($lenDelta);
#	print "Difference lenDelta=$lenDelta; Ri1sum=$Ri1sum\n";
    } #end while
    print "LOOPS: $loops; Convergence lenDelta=$lenDelta; Ri1sum=$Ri1sum\n";
    return %Ri;
}
#################
sub PageRankBL {
    my ($self,$rmDang, $Bias) = @_;
    # Parameters
    # $rmDang (boolean) remove dangling pages NOT USED
    # $Bias (boolean) use bias vector when calculating PageRank
    # $d1 is an Graph object that contains bias vector as attribute
    #     to the vertices. May be modified by this subroutine!

    # Using algorithm (1) The Power method from "Deeper Inside PageRank"
    # But doing calculations on backlinks only

    my @pages;
    my $n;
    my $i;

    my %Ri; #The pageRank vector (x(k?)T)
    my %Ri1;#The pageRank vector (x(k?)T)
    my %Ei; #Topical (personalization) vector (vT in alg)
    my $d = 0.85; #The alpha param

    @pages = $self->nodes();
    my $npages = $#pages + 1;
    print "Got $npages nodes\n";
    ########

    if ( $Bias ) {
        #Init Ei with autoclass scores
        #  maintain sum(Ei) == 1
	print "Initializing bias vector\n";
	my $totscore=0;
	foreach $i (@pages) {
	    $totscore += $self->getScore($i); 
	}
	$normconst = 1.0/$totscore;
	print "Using $normconst as normalization factor; Tot=$totscore\n";
	my $riNorm=0.0;
	foreach $i (@pages) {
	    $Ei{$i}=0.0;
	    $Ri{$i}=0.0;
	    if ( $self->hasScore($i) ) {
		$Ei{$i} = $self->getScore($i) * $normconst;
#		$Ri{$i}=1.0/$npages; #Uniform starting PageRank vector
		$Ri{$i}=$Ei{$i}; # Topic-score starting PageRank vector
	    } else { print "WARN node $i has no score\n"; }
	}
	my $eisum; foreach $i (@pages) { $eisum +=    $Ei{$i}; }
	print "EiSUM = $eisum\n";
	my $risum; foreach $i (@pages) { $risum +=    $Ri{$i}; }
	print "RiSUM = $risum\n";
    } else {
	#Uniform normalization (so that sum(Ei) = 1)
	foreach $i (@pages) { $Ri{$i}=1.0/$npages; $Ei{$i}= 1.0/$npages;}
    }

#Normalize relevance weights
    foreach $i (@pages) {
	# $i links to $l
	my $nlinks=$self->outDegree($i);
	if ($nlinks==0) {
	    next;
	}
###	my @links = $self->linksTo($i); #Sparse P matrix
	my @links = $self->linkedToBy($i); #Sparse P matrix
	my $sum=0.0;
	foreach my $l (@links) {
	    $sum += ${$self->{Nodes}->{$l}->{relevance}}{$i};
        }
#    print "Node $i: SUM=$sum\n";
        foreach my $l (@links) {
	    ${$self->{Nodes}->{$l}->{relevance}}{$i} = ${$self->{Nodes}->{$l}->{relevance}}{$i} / $sum;
            $r = ${$self->{Nodes}->{$l}->{relevance}}{$i};
#            print "  link to $l relevance=$r\n";
        }
    }

    my $loops=0;
    my $lenDelta = 1.0;
    my $Ri1sum;
    while ( ($lenDelta > 0.0000001) ) {
	$loops++;
	my $dangContr=0.0; # the term x(k-1)T*a
	foreach $i (@pages) {
	    # $i links to $l
###	    my $nlinks=$self->outDegree($i);
	    my $nlinks=$self->inDegree($i);
	    if ($nlinks==0) {
		$dangContr += $Ri{$i};
		next;
	    }
###	    my @links = $self->linksTo($i); #Sparse P matrix
	    my @links = $self->linkedToBy($i); #Sparse P matrix
#	    $mcontr = $Ri{$i}/($nlinks); #Uniform probability for all links
	    foreach my $l (@links) {
		#nonuniform jump probability
		my $mcontr = $Ri{$i} * (${$self->{Nodes}->{$l}->{relevance}}{$i});
		$Ri1{$i} += $mcontr; # x(k-1)T*P
#print "  Adding $mcontr to node $l making sum=$Ri1{$l}\n";
	    }
	}

	$Ri1sum=0.0;
	$lenDelta=0.0;
	foreach $i (@pages) {
	    my $tmp = $Ri{$i}; #Old PageRank
	    $Ri{$i} = $d * $Ri1{$i} + ($d * $dangContr + (1.0-$d))*$Ei{$i};
	    $Ri1sum += $Ri{$i};
	    $lenDelta = ($Ri{$i} - $tmp) * ($Ri{$i} - $tmp); #Euclidian length
	    $Ri1{$i}=0.0;
	}
	$lenDelta = sqrt($lenDelta);
#	print "Difference lenDelta=$lenDelta; Ri1sum=$Ri1sum\n";
    } #end while
    print "LOOPS: $loops; Convergence lenDelta=$lenDelta; Ri1sum=$Ri1sum\n";
    return %Ri;
}
#################

sub printProbMatrix {
    my ($self)=@_;
    my %P;
    foreach my $i ($self->nodes()) {
	foreach my $j ($self->nodes()) {
	    $P{$i}{$j}=0;
	}
    }
    foreach my $i ($self->nodes()) {
	foreach my $j (@{$self->{Nodes}->{$i}->{outlinks}}) {
#	    $P{$i}{$j}=1;
	    $P{$i}{$j}=${$self->{Nodes}->{$i}->{relevance}}{$j};
	}
    }
    print "P: ";
    foreach my $i ($self->nodes()) { print "$i "; }
    print "\n";
    foreach my $i ($self->nodes()) {
	print "$i: ";
	foreach my $j ($self->nodes()) {
	    my $p=$P{$i}{$j};
	    print "$p ";
	}
	print "\n";
    }
}

sub HITS {
    my ($Graph) = @_;

    my $k=5;
    my %x;
    my %y;
    my @pages = $Graph->vertices;
    #Evt initialize to rscore?
    foreach $p (@pages) { $x{$p}=1; $y{$p}=1; }
    my %yp;

    foreach $i (0..$k) {
    #I operation
        print "    #I operation\n";
	my %xp;
	foreach $p (@pages) {
	    $w=0;
	    @q = $Graph->predecessors($p);
	    foreach $q (@q) { $xp{$p} += $y{$q}; }
##	    foreach $q (0..$n) {
##		if ( $links{$q,$p} ) { $w += $y[$q]; }
##	    }
##	    $xp[$p]=$w;
##

	}
	#O operation
            print "    #O operation\n";
	my %yp;
	foreach $p (@pages) {
	    @q = $Graph->successors($p);
	    foreach $q (@q) { $yp{$p} += $xp{$q}; }
##	$w=0;
##	foreach $q (0..$n) {
##	    if ( $links{$p,$q} ) { $w += $xp[$q]; }
##	}
##	$yp[$p]=$w;

	}
	%x = Normalize(%xp);
	%y = Normalize(%yp);
#	&PrProgress($i);
    }
    my %HITS;
    foreach $i (keys(%x)) {
	$HITS{$i}{AUTH}=$x{$i};
	$HITS{$i}{HUB}=$y{$i};
	delete($y{$i});
    }
    foreach $i (keys(%y)) {
	$HITS{$i}{HUB}=$y{$i};
    }
    return %HITS;
}

sub Normalize {
    my (%arr) = @_;
    my $sum = 0.0;
    foreach my $v (keys(%arr)) { $sum += $arr{$v}*$arr{$v}; }
    my $nv = sqrt($sum);
#    print "Norm: NV=$nv; #=$#_\n";
    my %xn;
    foreach my $v (keys(%arr)) {
	$xn{$v} = $arr{$v}/$nv;
    }
    return %xn;
}
##################

sub PrProgress {
    ($it) = @_;
    my %TMx;
    my %TMy;
    print "Status iteration $it\n";
    foreach my $ii (0..$n) {
	if ($x[$ii]>0.001) {$TMx{$ii}=$x[$ii];}
	if ($y[$ii]>0.001) {$TMy{$ii}=$y[$ii];}
    }
    my $ant=0;
    foreach $ii (sort {$TMx{$b} <=> $TMx{$a};} keys(%TMx)) {
	print "X($ant): $TMx{$ii} $ii $urls[$ii]\n";
	last if ($ant++>10);
    }
    $ant=0;
    foreach $ii (sort {$TMy{$b} <=> $TMy{$a};} keys(%TMy)) {
	print "Y($ant): $TMy{$ii} $ii $urls[$ii]\n";
	last if ($ant++>10);
    }
}


############
1;