| IDS-Algorithms documentation | Contained in the IDS-Algorithms distribution. |
IDS::Algorithm::KruegelVigna - an IDS algorithm based on the Kruegel and Vigna paper (SEE ALSO).
A usage synopsis would go here. Since it is not here, read on.
See IDS::Algorithm.pm docs for any functions not described here.
This algorithm is HTTP-specific and will not work with any other data source.
This algorithm requires two passes over the training data to function properly.
Create the object for the algorithm. If the parameters are supplied, they are used; otherwise everything is defaults (unsurprisingly). If a filehandle is supplied, the filehandle is taken as the source for a load operation.
Copyright 2005-2007, Kenneth Ingham. All rights reserved.
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
Address bug reports and comments to: ids_test at i-pi.com. When sending bug reports, please provide the versions of IDS::Test.pm, IDS::Algorithm.pm, IDS::DataSource.pm, the version of Perl, and the name and version of the operating system you are using. Since Kenneth is a PhD student, the speed of the response depends on how the research is proceeding.
Please report them.
IDS::Test, IDS::DataSource, IDS::Algorithm
"Anomaly detection of web-based attacks" by Christopher Kruegel and Giovanni Vigna, pages 251--261 in Proceedings of the 10th ACM conference on computer and communications security, ACM Press, 2003, ISBN 1-58113-738-9. http://doi.acm.org/10.1145/948109.948144
libAnomaly, by Darren Mutz, Wil Robertson, Fredrik Valeur, Christopher Kruegel, Giovanni Vigna, and Richard Kemmerer. http://www.cs.ucsb.edu/~rsg/libAnomaly/index.html
| IDS-Algorithms documentation | Contained in the IDS-Algorithms distribution. |
package IDS::Algorithm::KruegelVigna; use base qw(IDS::Algorithm); $IDS::Algorithm::KruegelVigna::VERSION = "1.0";
use strict; use warnings; use IDS::Algorithm::Length; use IDS::Algorithm::KVCharDist; use IDS::Algorithm::Order; use IDS::Algorithm::Presence; use IDS::Algorithm::EnumOrRandom; use IDS::Algorithm::MM; use Carp qw(cluck carp confess);
# Part of the logic here seems kind of backwards, but we cannot load # until parameters have been loaded. Some of the parameters may affect # how we load. sub new { my $invocant = shift; my $class = ref($invocant) || $invocant; my $self = { }; my $source; my $state = 0; # necessary before we call handle_parameters. bless($self, $class); $self->default_parameters; $source = $self->handle_parameters(@_); $self->load($source) if defined($source); # unlikely to occur # due to IDS::Test framework $self->{"length"} = new IDS::Algorithm::Length; # 4.1 $self->{"chardist"} = new IDS::Algorithm::KVCharDist; # 4.2 $self->{"markov"} = new IDS::Algorithm::MM; # 4.3 $self->{"enumorrandom"} = new IDS::Algorithm::EnumOrRandom; # 4.4 $self->{"presence"} = new IDS::Algorithm::Presence; # 4.5 $self->{"order"} = new IDS::Algorithm::Order; # 4.6 # weights are per algorithm. We store the minimum similarity, since # the test framework has everything in [0,1] with 0 being anomalous $self->{"worst"} = {}; ${$self->{"worst"}}{"length"} = 1.0; ${$self->{"worst"}}{"chardist"} = 1.0; ${$self->{"worst"}}{"markov"} = 1.0; ${$self->{"worst"}}{"enumorrandom"} = 1.0; ${$self->{"worst"}}{"presence"} = 1.0; ${$self->{"worst"}}{"order"} = 1.0; return $self; } sub param_options { my $self = shift; # Local parameters last, to override the state file from the # sub-objects return ( $self->{"length"}->param_options, $self->{"chardist"}->param_options, $self->{"markov"}->param_options, $self->{"enumorrandom"}->param_options, $self->{"presence"}->param_options, $self->{"order"}->param_options, "kv_verbose=i" => \${$self->{"params"}}{"verbose"}, "ids_state=s" => \${$self->{"params"}}{"state_file"}, "kvfudge=f" => \${$self->{"params"}}{"fudge"}, "MMmap_chars" => \${$self->{"params"}}{"map_chars"}, ); } sub default_parameters { my $self = shift; %{$self->{"params"}} = ( "verbose" => 0, "state_file" => 0, "fudge" => 0.10, "map_chars" => 0, ); } sub parameters { my $self = shift; my @params = ( %{$self->{"params"}} ); push @params, $self->{"length"}->parameters if defined($self->{"length"}); push @params, $self->{"chardist"}->parameters if defined($self->{"chardist"}); push @params, $self->{"markov"}->parameters if defined($self->{"markov"}); push @params, $self->{"enumorrandom"}->parameters if defined($self->{"enumorrandom"}); push @params, $self->{"presence"}->parameters if defined($self->{"presence"}); push @params, $self->{"order"}->parameters if defined($self->{"order"}); if ($#_ == -1) { return wantarray ? @params : \@params; } # If we are setting, set in all of the sub-objects # For value, return the last if ($#_ == 1) { my $old = ${$self->{"params"}}{$_[0]}; ${$self->{"params"}}{$_[0]} = $_[1]; $old = $self->{"length"}->parameters($_[0], $_[1]); $old = $self->{"chardist"}->parameters($_[0], $_[1]); $old = $self->{"markov"}->parameters($_[0], $_[1]); $old = $self->{"enumorrandom"}->parameters($_[0], $_[1]); $old = $self->{"presence"}->parameters($_[0], $_[1]); $old = $self->{"order"}->parameters($_[0], $_[1]); return $old; } scalar(@_) % 2 != 0 and confess "odd > 1 number of parameters passed to ", *parameters{PACKAGE}, ". See documentation for proper usage.\n"; # If we are setting, set in all of the sub-objects for (my $i = 0; $i < $#_; $i+=2) { ${$self->{"params"}}{$_[$i]} = $_[$i+1]; $self->{"length"}->parameters($_[$i], $_[$i+1]); $self->{"chardist"}->parameters($_[$i], $_[$i+1]); $self->{"markov"}->parameters($_[$i], $_[$i+1]); $self->{"enumorrandom"}->parameters($_[$i], $_[$i+1]); $self->{"presence"}->parameters($_[$i], $_[$i+1]); $self->{"order"}->parameters($_[$i], $_[$i+1]); } return 1; } # Instead of file to save in, our argument will be a dir to save in. sub save { my $self = shift; my $dir = $self->find_fname(shift); defined($dir) && $dir or confess *save{PACKAGE} . "::save missing dir"; unless (-d $dir) { warn "$dir does not exist; creating.\n"; mkdir $dir or confess "mkdir '$dir' failed: $!\n"; } $dir =~ m!/$! or $dir .= "/"; $self->{"length"}->save($dir . "length"); $self->{"chardist"}->save($dir . "chardist"); $self->{"markov"}->save($dir . "markov"); $self->{"enumorrandom"}->save($dir . "enumorrandom"); $self->{"presence"}->save($dir . "presence"); $self->{"order"}->save($dir . "order"); $self->calc_weights unless defined($self->{"weight"}); my $wfname = $dir . "weights"; open(WF, ">$wfname") or confess "Cannot open $wfname for writing: $!\n"; for my $m (keys %{$self->{"weight"}}) { print WF "$m: ", ${$self->{"weight"}}{$m}, "\n"; } close WF; my $bfname = $dir . "bottoms"; open(WF, ">$bfname") or confess "Cannot open $bfname for writing: $!\n"; for my $m (keys %{$self->{"bottom"}}) { print WF "$m: ", ${$self->{"bottom"}}{$m}, "\n"; } close WF; } # Instead of file to load from, our argument will be a dir to load from. sub load { my $self = shift; my $dir = $self->find_fname(shift); defined($dir) && $dir or confess *load{PACKAGE} . "::load missing dir"; $dir =~ m!/$! or $dir .= "/"; $self->{"length"}->load($dir . "length"); $self->{"chardist"}->load($dir . "chardist"); $self->{"markov"}->load($dir . "markov"); $self->{"enumorrandom"}->load($dir . "enumorrandom"); $self->{"presence"}->load($dir . "presence"); $self->{"order"}->load($dir . "order"); my $wfname = $dir . "weights"; open(WF, "$wfname") or confess "Cannot open $wfname for reading: $!\n"; while (<WF>) { chomp; my ($m, $w) = split(/: /, $_, 2); ${$self->{"weight"}}{$m} = $w; } close WF; my $bfname = $dir . "bottoms"; open(WF, "$bfname") or confess "Cannot open $bfname for reading: $!\n"; while (<WF>) { chomp; my ($m, $w) = split(/: /, $_, 2); ${$self->{"bottom"}}{$m} = $w; } close WF; } sub method { my $self = shift; my $string = shift or confess "bug: missing string to ", *method{PACKAGE} . "::method"; # The method is the first "word" in the string. $string =~ /([^\s]+)\s/; return $1; } sub path { my $self = shift; my $string = shift or confess "bug: missing string to ", *method{PACKAGE} . "::method"; # The path is the second "word" in the string. Assume greedy # pattern matching $string =~ /[^\s]+\s+([^\s]+)/; return $1; } sub test { my $self = shift; my $tokensref = shift or confess "bug: missing tokensref to ", *test{PACKAGE} . "::test"; my $string = shift or confess "bug: missing string to ", *test{PACKAGE} . "::test"; my $instance = shift or confess "bug: missing instance to ", *test{PACKAGE} . "::test"; my $verbose = ${$self->{"params"}}{"verbose"}; # This test is only applied to GET URIs # All others are considered normal return 1 unless $self->method($string) eq "GET"; my $uri = $self->path($string); my $q = $self->extract_query($uri); my $path = $self->extract_qpath($uri); # This test is only applied to CGI queries # All others are considered normal return 1 unless $q; if (defined($q)) { my @Sq = split(/\&/, $q); my @Sa = @Sq; my @Sv = @Sq; map { s/=.*// } @Sa; map { s/.*=// } @Sv; # For Markov model; KV mapped characters to a class of their case my $mm_q = $q; if (${$self->{"params"}}{"map_chars"}) { $mm_q =~ tr/a-z/xxxxxxxxxxxxxxxxxxxxxxxxxx/; $mm_q =~ tr/A-Z/XXXXXXXXXXXXXXXXXXXXXXXXXX/; } my @Sc = split(//, $mm_q); my ($n, $m, $r, $l, $c, $mk, $er, $pr, $o, $result); defined($self->{"weight"}) or $self->calc_weights; $n = 0; # attribute lengths (4.1 in kruegel2003anomaly) $m = 0; $r = 0; map { $l = $self->{"length"}->test(undef, $_, $instance); $m++; $r += $l > ${$self->{"bottom"}}{"length"} ? 1 : 0; } @Sv; $result = $r / $m; print "Attr len: $result n $n\n" if $verbose; # character distribution (4.2 in kruegel2003anomaly) $m = 0; $r = 0; map { $c = $self->{"chardist"}->test(undef, $_, $instance); $m++; $r += $c > ${$self->{"bottom"}}{"chardist"} ? 1 : 0; } @Sv; $result += $r / $m; print "chardist: $result n $n\n" if $verbose; # Markov model (4.3 in kruegel2003anomaly) $mk = $self->{"markov"}->test(\@Sc, undef, $instance); $mk = $mk > 0 ? 1 : 0; # from sec 4.3.2 in K&V $n++; $result += $mk > ${$self->{"bottom"}}{"markov"} ? 1 : 0; print "MM: $result n $n\n" if $verbose; # enumeration or random values (4.4 in kruegel2003anomaly) $er = $self->{"enumorrandom"}->test(\@Sv, undef, $instance); $n++; $result += $er > ${$self->{"bottom"}}{"enumorrandom"} ? 1 : 0; print "enumorrandom: $result n $n\n" if $verbose; # attribute presence or absence (4.5 in kruegel2003anomaly) # Note non-standard IDS::Algorithm interface $pr = $self->{"presence"}->test($path, \@Sa, $instance); $n++; $result += $pr > ${$self->{"bottom"}}{"presence"} ? 1 : 0; print "presence: $result n $n\n" if $verbose; # attribute order (4.6 in kruegel2003anomaly) $o = $self->{"order"}->test(\@Sv, undef, $instance); $n++; $result += $o > ${$self->{"bottom"}}{"order"} ? 1 : 0; print "attrorder: $result n $n\n" if $verbose; print "return = ", $result / $n, "\n\n" if $verbose; return $result / $n; } else { # non-CGI with parameters are considered normal. return 1; } } sub calc_weights { my $self = shift; my $fudge = 1.0 - ${$self->{"params"}}{"fudge"}; my @models = qw(length chardist markov enumorrandom presence order); for my $m (@models) { ${$self->{"bottom"}}{$m} = ${$self->{"worst"}}{$m} * $fudge; ${$self->{"weight"}}{$m} = 1.0 / (1.0 - ${$self->{"bottom"}}{$m}); } } sub add { my $self = shift; my $tokensref = shift or confess "bug: missing tokensref to ", *test{PACKAGE} . "::test"; my $string = shift or confess "bug: missing string to ", *test{PACKAGE} . "::test"; my $instance = shift or confess "bug: missing instance to ", *test{PACKAGE} . "::test"; my $verbose = ${$self->{"params"}}{"verbose"}; # This algorithm only applies to GET URIs # All others are considered normal my $method = $self->method($string); return 1 unless defined($method) && $method eq "GET"; my $uri = $self->path($string); my $q = $self->extract_query($uri); my $path = $self->extract_qpath($uri); return 1 unless $q; print "Query: '$q'\n" if $verbose; if (defined($q)) { my @Sq = split(/\&/, $q); my @Sa = @Sq; my @Sv = @Sq; map { s/=.*// } @Sa; map { s/.*=// } @Sv; print "Sa @Sa\n" if $verbose; print "Sv @Sv\n" if $verbose; # For Markov model; KV mapped characters to a class of their case my $mm_q = $q; if (${$self->{"params"}}{"map_chars"}) { $mm_q =~ tr/a-z/xxxxxxxxxxxxxxxxxxxxxxxxxx/; $mm_q =~ tr/A-Z/XXXXXXXXXXXXXXXXXXXXXXXXXX/; } my @Sc = split(//, $mm_q); # attribute lengths (4.1 in kruegel2003anomaly) map { $self->{"length"}->add(undef, $_, $instance) } @Sv; # character distribution (4.2 in kruegel2003anomaly) map { $self->{"chardist"}->add(undef, $_, $instance) } @Sv; # Markov model (4.3 in kruegel2003anomaly) $self->{"markov"}->add(\@Sc, undef, $instance); # enumeration or random values (4.4 in kruegel2003anomaly) $self->{"enumorrandom"}->add(\@Sq, undef, $instance); # attribute presence or absence (4.5 in kruegel2003anomaly) # Note non-standard IDS::Algorithm interface $self->{"presence"}->add($path, \@Sa, $instance); # attribute order (4.6 in kruegel2003anomaly) $self->{"order"}->add(\@Sv, undef, $instance); } } sub next_pass { my $self = shift; my $tokensref = shift or confess "bug: missing tokensref to ", *test{PACKAGE} . "::test"; my $string = shift or confess "bug: missing string to ", *test{PACKAGE} . "::test"; my $instance = shift or confess "bug: missing instance to ", *test{PACKAGE} . "::test"; # This algorithm only applies to GET URIs # All others are considered normal return 1 unless $self->method($string) eq "GET"; my $uri = $self->path($string); my $q = $self->extract_query($uri); my $path = $self->extract_qpath($uri); if (defined($q)) { my @Sq = split(/\&/, $q); my @Sa = @Sq; my @Sv = @Sq; map { s/=.*// } @Sa; map { s/.*=// } @Sv; my ($l, $c, $mk, $er, $pr, $o); # attribute lengths (4.1 in kruegel2003anomaly) map { $l = $self->{"length"}->test(undef, $_, $instance); ${$self->{"weight"}}{"length"} = $l if $l < ${$self->{"weight"}}{"length"}; } @Sv; # character distribution (4.2 in kruegel2003anomaly) map { $c = $self->{"chardist"}->test(undef, $_, $instance); ${$self->{"weight"}}{"chardist"} = $c if $c < ${$self->{"weight"}}{"chardist"}; } @Sv; # Markov model (4.3 in kruegel2003anomaly) ### Do the Markov models work with characters? $mk = $self->{"markov"}->test(\@Sv, undef, $instance); $mk = $mk > 0 ? 1 : 0; # from sec 4.3.2 in K&V ${$self->{"weight"}}{"markov"} = $mk if $mk < ${$self->{"weight"}}{"markov"}; # enumeration or random values (4.4 in kruegel2003anomaly) $er = $self->{"enumorrandom"}->test(\@Sv, undef, $instance); ${$self->{"weight"}}{"enumorrandom"} = $er if $er < ${$self->{"weight"}}{"enumorrandom"}; # attribute presence or absence (4.5 in kruegel2003anomaly) # Note non-standard IDS::Algorithm interface $pr = $self->{"presence"}->test($path, \@Sv, $instance); ${$self->{"weight"}}{"presence"} = $pr if $pr < ${$self->{"weight"}}{"presence"}; # attribute order (4.6 in kruegel2003anomaly) $o = $self->{"order"}->test(\@Sv, undef, $instance); ${$self->{"weight"}}{"order"} = $o if $o < ${$self->{"weight"}}{"order"}; } } sub extract_query { my $self = shift; my $uri = shift or confess "bug: missing uri to ", *extract_query{PACKAGE} . "::extract_query"; if ($uri =~ /\?/) { $uri =~ s/^.*\?//; return $uri; } else { return undef; # no query } } sub extract_qpath { my $self = shift; my $uri = shift or confess "bug: missing uri to ", *extract_qpath{PACKAGE} . "::extract_qpath"; if ($uri =~ /\?/) { $uri =~ s/\?.*$//; return $uri; } else { return undef; # no query } } sub generalize { my $self = shift; }
1;