| Net-Analysis documentation | Contained in the Net-Analysis distribution. |
Net::Analysis::Listener::HTTPPipelining - another HTTP listener
This is an alternate version of N::A::L::HTTP, which has support for pipelined HTTP requests. It is experimental; eventually, it will become the default version of N::A::L::HTTP.
Net::Analysis::Listener::HTTP
Adam B. Worrall, <worrall@cpan.org>
Copyright (C) 2004 by Adam B. Worrall
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.5 or, at your option, any later version of Perl 5 you may have available.
| Net-Analysis documentation | Contained in the Net-Analysis distribution. |
package Net::Analysis::Listener::HTTPPipelining; # $Id: HTTP.pm 140 2005-10-21 16:31:29Z abworrall $ # {{{ Boilerplate use 5.008000; our $VERSION = '0.02'; use strict; use warnings; use Carp qw(carp croak confess); use Params::Validate qw(:all); use base qw(Net::Analysis::Listener::Base); # }}} use Net::Analysis::Packet qw(:pktslots :func); use Net::Analysis::TCPMonologue; use HTTP::Response; use HTTP::Request; #### Callbacks # # {{{ validate_configuration sub validate_configuration { my $self = shift; my %h = validate (@_, { v => {type => SCALAR, default => 0} }); return \%h; } # }}} # {{{ setup # A chance to setup stuff for our listener sub setup { my ($self) = shift; $self->{sesh} = {}; # TCP sessions } # }}} # {{{ teardown sub teardown { my ($self) = shift; } # }}} # {{{ tcp_session_start sub tcp_session_start { my ($self, $args) = @_; my $pkt = $args->{pkt}; # Might well be undef my $k = $args->{socketpair_key}; if ($self->{v} & 0x04) { $self->trace (" ==== tcp session start [$pkt->{from} -> $pkt->{to}]"); } } # }}} # {{{ tcp_session_end sub tcp_session_end { my ($self, $args) = @_; my $pkt = $args->{pkt}; # Might well be undef my $k = $args->{socketpair_key}; $self->trace(" ==== tcp session end [$k]") if ($self->{v} & 0x04); my $sesh = $self->_remove_sesh ($k); return if (! defined $sesh); $self->_unpipeline_data($sesh); } # }}} # {{{ tcp_monologue # In a pipelined request, the concept of orderly back-and-forth # monologues breaks down. Conceptually, all the requests are # glued together into one big monologue, and then all the responses # are returned in another big monologue. # These jumbo monlogues may be transmitted simultaneusly (that is # kinda the point of pipelining ;), so from the perspective of our # pcap files, the divisions between monologues are arbitrary. # So, we assign no meaning to the monologues that arrive; all we do # is steal the packets from them. # All the hard work - breaking apart the jumbo monologues into separate # requests and responses, pairing them up, and generating synthetic # monologues, is done in tcp_session_end. sub tcp_monologue { my ($self, $args) = @_; my $k = $args->{socketpair_key}; my $mono = $args->{monologue}; my $sesh = $self->_get_sesh ($k); # Decide which jumbo monologue to place these pkts in my $socketpair_from = $mono->first_packet()->[PKT_SLOT_FROM]; my $pkts = $sesh->{$socketpair_from} ||= []; push (@$pkts, @{ $mono->_data_packets }); # POTENTIAL MEM LEAK !! TAKE CARE } # }}} # {{{ http_transaction # Listen to our own event and print very basic report, if asked sub http_transaction { my ($self, $args) = @_; return if (! $self->{v}); my $req = $args->{req}; my $uri = (defined $req) ? $req->uri() : "(no uri)"; my $method = (defined $req) ? $req->method() : "(no method)"; my $t = $args->{t_elapsed} || -1.0; printf "%8.4fs : $method %s\n", $t, $uri; } # }}} # {{{ as_string sub as_string { my ($self) = @_; my $s = ''; $s .= "[".ref($self)."]"; return $s; } # }}} #### Support funcs # # {{{ _trace # This may become more clever ... our $TRACE=0; sub _trace { my ($self) = shift; return if (! $TRACE); foreach (@_) { my $l = $_; # Skip 'Modification of a read-only value' errors chomp ($l); print "$l\n"; } } # }}} # {{{ _{get|remove}_sesh sub _get_sesh { my ($self, $sesh_key) = @_; if (! exists $self->{sesh}{$sesh_key}) { $self->{sesh}{$sesh_key} = {}; } return $self->{sesh}{$sesh_key}; } sub _remove_sesh { my ($self, $sesh_key) = @_; return delete ($self->{sesh}{$sesh_key}); } # }}} # {{{ _printable sub _printable { my $raw = shift; $raw =~ s {([^\x20-\x7e])} {.}g; return $raw; } # }}} # {{{ _unchunk_response sub _unchunk_response { my ($resp) = @_; my $transfer_encoding = $resp->header('transfer-encoding'); return if (!$transfer_encoding); # http://www.jmarshall.com/easy/http/#http1.1c2 if ($transfer_encoding eq 'chunked') { my $chunked_data = $resp->content(); my $unchunked_data = ''; my ($chunk_size_hex, $chunk_size, $chunk); while ($chunked_data) { # Read chunk size. Discard chunking comments. ($chunk_size_hex, $chunked_data) = ($chunked_data =~ /^([0-9a-fA-F]+)(?:;.*)?\r\n(.*)/s); last if (!defined $chunk_size_hex); $chunk_size = oct("0x$chunk_size_hex"); last if ($chunk_size == 0); # Sod trailing headers! # allow for \r\n trailing the chunk $chunk = substr ($chunked_data, 0, $chunk_size+2, ''); substr ($chunk, -2, 2, ''); $unchunked_data .= $chunk; } $resp->content($unchunked_data); } } # }}} # {{{ _unpipeline_data sub _unpipeline_data { my $self = shift; my ($sesh) = @_; my @jumbo_monos; foreach my $k (keys %$sesh) { my $mono = Net::Analysis::TCPMonologue->new(); foreach my $pkt (@{ $sesh->{$k} }) { $mono->add_packet ($pkt); } push (@jumbo_monos, $mono); } my ($req_mono, $resp_mono); if ($jumbo_monos[0]->data =~ m!^(get|post|head)\s+([^ ]+)(HTTP/\d.\d)?!i) { ($req_mono, $resp_mono) = @jumbo_monos; } elsif ($jumbo_monos[1]->data =~ m!^(get|post|head)\s+([^ ]+)(HTTP/\d.\d)?!i) { ($resp_mono, $req_mono) = @jumbo_monos; } else { carp ("This TCP session doesn't look very HTTPish"); } my (@reqs) = $self->_unpipeline_http_requests ($req_mono); my (@resps) = $self->_unpipeline_http_responses ($resp_mono); if (@reqs != @resps) { carp ("found ".scalar(@reqs)." reqs but ".scalar(@resps). " resps in pipelined HTTP"); return; } while (@reqs) { my ($req_data, $req_mono) = @{ shift (@reqs) }; my ($resp_data, $resp_mono) = @{ shift (@resps) }; my ($http_req) = HTTP::Request->parse($req_data); my ($http_resp) = HTTP::Response->parse($resp_data); _unchunk_response ($http_resp); # Should port this to HTTP::Message my $host = $http_req->header('host') || '(nohost)'; my $uri = $http_req->uri() || '/noURI'; $self->_trace (">>>> $host$uri <<\n"); $self->_trace (" << ".$http_resp->code().", ". length($http_resp->content())." bytes"); my $args = {socketpair_key => $req_mono->socketpair_key(), req => $http_req, req_mono => $req_mono, resp => $http_resp, resp_mono => $resp_mono, t_end => $resp_mono->t_end()->clone()}; $args->{t_start} = $req_mono->t_start()->clone(); $args->{t_elapsed} = $args->{t_end} - $args->{t_start}; $self->emit (name => 'http_transaction', args => $args); } } # }}} # {{{ _unpipeline_http_requests sub _unpipeline_http_requests { my $self = shift; my ($mono) = @_; # rfc 2612/8.1.2.2 : "Clients SHOULD NOT pipeline requests using # non-idempotent methods" # rfc 2612/9.1.2 : (GET, HEAD, PUT, DELETE) are idempotent. # But, PUT may contain a data block (like POST), so we need to take care my @ret; my ($n_start,$n_end) = (0,0); my @blocks = (map {$_."\r\n\r\n"} split ("\r\n\r\n", $mono->data())); while (@blocks) { $_ = shift(@blocks); # PUT and POST requests have data after the request/header block; # snarf it if needed $_ .= shift(@blocks) if (/^(put|post) /i); $n_end = $n_start + length($_) - 1; #print " >> [$n_start, $n_end] ".(split("\n",$_))[0]."\n"; # Now build a mono from the packets that made up this block my $sub_mono = Net::Analysis::TCPMonologue->new(); foreach my $pkt (@{ $mono->which_pkts($n_start,$n_end) }) { $sub_mono->add_packet ($pkt); } push (@ret, [$_, $sub_mono]); $n_start = $n_end+1; } #print ">>>>[".($n_end+1)." / ".$mono->length()." ]>>>>\n"; return @ret; } # }}} # {{{ _unpipeline_http_responses # This is knottier than it looks. # I just split on what looks like the start of a response. But if a response # just so happened to contain a string that matched "HTTP/1.1 200 OK", this # breaks. # Ideally, I'd parse the Content-Length header instead, and only read the # right amount of data. But, a HEAD request generates a response with no # body data, but a content-length that indicates what the response *would* # have been (for a GET). # So: I need to know the request method in order to know whether to trust # the Content-Length header, which is altogether too much irritation for now. sub _unpipeline_http_responses { my $self = shift; my ($mono) = @_; my (@ret); my @bits = split (qr{(HTTP/\d\.\d \d\d\d)}, $mono->data()); shift (@bits); # Grr, empty field to the left of first "HTTP/1.1..." my ($n_start,$n_end) = (0,0); while (@bits) { $_ = shift (@bits); $_ .= shift (@bits); $n_end = $n_start + length($_) - 1; # print " << [$n_start, $n_end] ".(split("\n",$_))[0]."\n"; # Now build a mono from the packets that made up this block my $sub_mono = Net::Analysis::TCPMonologue->new(); foreach my $pkt (@{ $mono->which_pkts($n_start,$n_end) }) { $sub_mono->add_packet ($pkt); } push (@ret, [$_, $sub_mono]); $n_start = $n_end+1; } #print "<<<<[".($n_end+1)." / ".$mono->length()." ]<<<<\n"; return @ret; } # }}} 1; __END__ # {{{ POD
# }}} # {{{ tcp_session_end sub tcp_session_end { my ($self, $args) = @_; my $pkt = $args->{pkt}; # Might well be undef my $k = $args->{socketpair_key}; $self->trace(" ==== tcp session end [$k]") if ($self->{v} & 0x04); my $sesh = $self->_remove_sesh ($k); return if (! defined $sesh); my ($req_k, $resp_k); foreach my $k (keys %$sesh) { my $d = $sesh->{$k}->data(); # If a HTTP response happens to contain this string, this breaks $req_k = $k if ($d =~ m!^(get|post|head)\s+([^ ]+)(HTTP/\d.\d)?!i); } foreach my $k (keys %$sesh) { $resp_k = $k if ($k ne $req_k); } # Basically, I'm assuming that we got all packets in question # (e.g. we're not missing a few initial request packets). # split up all the requests ... if ($sesh->{$req_k} !~ /^(get|post|head)\s+/i) { carp "HTTP request stream in $k had leading junk\n"; } my @reqs = split ("\r\n\r\n", $sesh->{$req_k}); # ... split up all the responses if ($sesh->{$resp_k} !~ m{^http/\d.\d \d.\d\s+}i) { carp "HTTP response stream in $k had leading junk\n"; } my @t_resps = split (qr{(http/\d.\d \d{3}\s+[^\n]+)}i, $sesh->{$resp_k}); shift(@t_resps); # LHS of the first split is the empty string # Recombine the elements, to glue the split field (HTTP ...) onto the data my @resps; while (@t_resps) { push (@resps, shift(@t_resps).shift(@t_resps)); } if (@reqs != @resps) { carp "number of HTTP reqs not the same as HTTP resps"; } while (@reqs) { my $d_req = shift (@reqs); my $d_resp = shift (@resps); #print "$d_req\n"; my $req = HTTP::Request->parse ($d_req); my $resp = HTTP::Response->parse($d_resp); _unchunk_response ($resp); # Should really port this to HTTP::Message my $host = $req->header('host') || '(nohost)'; my $uri = $req->uri() || '/noURI'; $self->_trace (">>>> $host$uri <<\n"); $self->_trace (" << ".$resp->code().", ". length($resp->content())." bytes"); my $args = {socketpair_key => $k, req => $req, #req_mono => $req_mono, resp => $resp, #resp_mono => $mono, #t_end => $mono->t_end()->clone(), }; #if (defined $req_mono) { # $args->{t_start} = $sesh->{req_mono}->t_start()->clone(); # $args->{t_elapsed} = $args->{t_end} - $args->{t_start}; #} $self->emit (name => 'http_transaction', args => $args); } } # }}} # {{{ tcp_monologue2 sub tcp_monologue2 { my ($self, $args) = @_; my $k = $args->{socketpair_key}; my $mono = $args->{monologue}; my $sesh = $self->_get_sesh ($k); my $d = $mono->{data}; my ($l) = (split('\n', $d))[0]; my ($first_line) = ''; if (defined $l) { $l = substr($l,0,40) if (length($l) > 40); $first_line = _printable($l); } our $TRACE=0; # $TRACE = 1 if ($k eq '10.6.94.7:8080-159.206.22.101:2647'); # if ($k eq '10.6.94.7:8080-159.206.22.101:2647') { # print "mono $k ".$mono->first_packet()->{time}."\n"; # } if ($d =~ m!^(get|post|head)\s+([^ ]+)(HTTP/\d.\d)?!i) { if (exists $sesh->{req}) { carp "already have a req for $k, overwriting it\n"; } $sesh->{req} = HTTP::Request->parse($d); $sesh->{req_mono} = $mono; # Careful ! Must delete this ... my $host = $sesh->{req}->header('host') || '(nohost)'; my $uri = $sesh->{req}->uri() || '/noURI'; $self->_trace (">>!> $host$uri <<\n"); } elsif ($d =~ m!^HTTP/\d.\d\s+(\d{3})!i) { my $resp = HTTP::Response->parse($d); _unchunk_response ($resp); # Should really port this to HTTP::Message if (defined $sesh->{req}) { my $host = $sesh->{req}->header('host') || '(nohost)'; my $uri = $sesh->{req}->uri() || '/noURI'; $self->_trace (">>>> $host$uri <<\n"); } else { $self->_trace (">>>> ????? (no req found in sesh) <<\n"); } $self->_trace (" << ".$resp->code().", ". length($resp->content())." bytes"); my $req_mono = $sesh->{req_mono}; my $args = {socketpair_key => $k, req => $sesh->{req}, req_mono => $req_mono, resp => $resp, resp_mono => $mono, t_end => $mono->t_end()->clone()}; if (defined $req_mono) { $args->{t_start} = $sesh->{req_mono}->t_start()->clone(); $args->{t_elapsed} = $args->{t_end} - $args->{t_start}; } $self->emit (name => 'http_transaction', args => $args); delete ($sesh->{req}); delete ($sesh->{req_mono}); } else { $self->_trace ("malformed HTTP monologue in $k starts: $first_line\n"); } } # }}} # {{{ _http_message_has_body sub _http_message_has_body { my $self = shift; my ($str,$str2) = @_; if ($str =~ m!^(get|post|head|put|delete)\s+([^ ]+)(HTTP/\d.\d)?!i) { return (($1 eq 'PUT') || ($1 eq 'POST')) ? 1 : 0; } elsif ($str =~ m!^HTTP/\d.\d\s+(\d{3})!i) { return 0; } } # }}} # {{{ -------------------------={ E N D }=---------------------------------- # Local variables: # folded-file: t # end: # }}}