/usr/local/CPAN/resched/FormInput.pm


#!/usr/bin/perl -wT
# -*- cperl -*-

package FormInput;

sub taint {use Taint (); for(@_){Taint::taint($_) if defined $_} return @_;}

sub lastvals { # deprecated in favor of getforminput(multiples=>'last')
  # Very similar to ashash, but splits on /,/ and takes only the last value of each.
  my %i = %{ashash()};
  for $k (keys %i) {
    $i{$k} = (split /,/, $i{$k})[-1];
  }
  return \%i;
}
sub firstvals { # deprecated in favor of getforminput(multiples=>'last')
  # Very similar to ashash, but splits on /,/ and takes only the first value of each.
  my %i = %{ashash()};
  for $k (keys %i) {
    $i{$k} = (split /,/, $i{$k})[0];
  }
  return \%i;
}
sub HoA { # deprecated in favor of getforminput()
  # Like ashash but splits values on comma and returns a hash of arrayrefs.
  my %i = %{ashash()};
  for $k (keys %i) {
    $i{$k} = [split /,/, $i{$k}];
  }
  return \%i;
}
sub ashash { # deprecated in favor of getforminput(multiples=>'join')
  my %input;
  my ($num_bytes, $bytesread, $formdata, $name, $value, $boundary, $part, $parts, $partdebug, $partname);
  my (%head, %val, $headers, $h, %disposition, $content_disposition, $t, %partdata, %content_type);
  $num_bytes=$ENV{'CONTENT_LENGTH'};
  if (defined $num_bytes and $num_bytes > 0) {
    $bytesread = read (STDIN, $formdata, $num_bytes);
  } else {
    $formdata=$ENV{'QUERY_STRING'};
  }
  if (exists $ENV{CONTENT_TYPE} and $ENV{CONTENT_TYPE}=~/multipart\/form-data.*boundary=(.+?)$/) {
	$boundary=$1; taint($boundary);
	foreach $part (split /--$boundary/, $formdata) {
      taint($part);
	  if ($main::debug) {
		$parts++;
		$partdebug.="<li><pre>$part</pre></li>";
	  }
	  $partname='';

      ($headers, $value) = $part =~ /^(.*?)\n\s?\n(.*)$/s;
      ($head{$parts}, $val{$parts}) = taint($headers, $value);

	  foreach $h (split (/\n/, $headers)) {
        if ($h =~ /Content-Disposition: ([^;]+); (.*?)$/) {
		  taint(($content_disposition, $t)=($1,$2));
		  foreach $_ (split /; /, $t) {
            if (/^name=(.*?)$/) {
              $partname=$1; $partname=~s/\"//g; taint($partname);
              $disposition{$partname}=$content_disposition;
            } elsif (/^([^=]+)=([^=]+?)$/) {
              my ($n, $v) = ($1, $2); taint($n, $v);
              $partdata{$partname}{$n}=$v;
            } elsif ($main::debug>2) {
			  $main::dribble .= "<li>[Unrecognised name/value pair on content disposition line] <code>$_</code></li>\n";
            }
          }
        } elsif ($h=~/^Content-Type: (.*?)$/) {
          my $ctpn = $1;
          $content_type{$partname}=taint($ctpn);
		} elsif ($main::debug>2) {
		  $main::dribble .= "<li>[Unrecognised header] <pre>$h</pre></li>\n";
		}
	  }
        $input{$partname}=$value;
	}
  } else {
	foreach (split /&/, $formdata) {
      # Reverse the encoding generated by the browser for the CGI interface:
	  s/\+/ /g; # Decode spaces
	  ($name, $value) = split(/=/, $_);
	  $name =~ s/%(..)/pack("c",hex($1))/ge;  # These lines reverse the %nn encodings
	  $value =~ s/%(..)/pack("c",hex($1))/ge; # cgi does for punctuation marks and such.
      taint($name, $value);
      if (exists $input{$name}) {
        $input{$name} .= ",$value";
        # Yes, this means that if you have multiple inputs with the
        # same name (e.g., checkboxen) you dursn't have commas within
        # any of the possible values.
      } else {
        $input{$name}=$value;
      }
	}
  }
  if ($formdata) {
    return \%input;
  } else {
    return +{};
  }
}

sub getforminput {
  my %opt = @_;
  # OPTIONS:
  # multiples         - What to do if there are multiple inputs with the
  #                     same name.  By default, you get an arrayref, but
  #                     if you set this to 'first' or 'last', you'll get
  #                     the first or last value, respectively.  'join'
  #                     will "firstval,secondval,thirdval,...,lastval".
  # filename          - If true, and if the browser supplies a filename for
  #                     a file upload, send it as 'filename'.  (Multiples
  #                     are not supported by this, so you can't also have
  #                     a form element named 'filename'.)  Default is
  #                     to ignore any user-supplied filename(s), which
  #                     is generally recommended for security anyway.
  # taint             - set true if Taint.pm is available for use
  #                     and taint-checking is desirable (recommended).
  eval { use Taint; } if $opt{taint};
  die "Cannot both fold and reject multiples.\n" if $opt{fold_multiples} and $opt{reject_multiples};
  my ($formdata, %input);
  { my $num_bytes=$ENV{CONTENT_LENGTH};
    if ($num_bytes > 0) {
      $num_bytes == read (STDIN, $formdata, $num_bytes) or warn "CONTENT_LENGTH is full of lies!";
    } else {
      $formdata=$ENV{QUERY_STRING};
    }}
  if ($ENV{CONTENT_TYPE}=~/multipart\/form-data.*boundary=(.+?)$/) {
    my $boundary=$1;
    Taint::taint($boundary) if $opt{taint};
    foreach my $part (split /--$boundary/, $formdata) {
      my $partname="";
      my ($headers, $value, @moreval) = $part =~ /^(.*?)\r?\n\s?\r?\n(.*?)(?:\r?\n)?$/s;
      Taint::taint($headers, $value) if $opt{taint};
      $value=join("\n\n", ($value, @moreval)) if @moreval;
      foreach my $h (split (/\r?\n/, $headers)) {
        if ($h =~ /Content-Disposition: ([^;]+); (.*?)$/) {
          my ($content_disposition, $t)=($1,$2); Taint::taint($t) if $opt{taint};
          foreach (split /; /, $t) {
            if (/^name=(.*?)$/) {
              ($partname) = $1 =~ /\"?([^"]*)/;
              Taint::taint($partname) if $opt{taint};
            } elsif (/^filename=(.*?)$/ and $opt{filename}) {
              my ($filename) = $1 =~ /\"?([^"]*)/;
              Taint::taint($filename) if $opt{taint};
              $input{filename} = $filename; # Note that multiples aren't supported for this.
            }}
        }}
      if ($partname) {
        if ($opt{multiples} eq 'first') {
          $input{$partname} = $value unless exists $input{$partname}; # Take first value only; reject subsequent ones.
        } elsif ($opt{multiples} eq 'last') {
          $input{$partname} = $value;                                 # Take the latest value every time.
        } elsif ($opt{multiples} eq 'join') {
          $input{$partname} = (join ",", $input{$partname} , $value); # Join with commas.
        } else { #                                                    # Default: construct an arrayref if necessary.
          if (exists $input{$partname} and ref $input{$partname}) {
            push @{$input{$partname}}, $value;
          } elsif (exists $input{$partname}) {
            $input{$partname} = [$input{$partname}, $value];
          } else {
            $input{$partname} = $value;
          }}
      }}
  } else {
    foreach (split /&/, $formdata) {
      s/\+/ /g; # That's how CGI encodes spaces.
      my ($name, $value) = split(/=/, $_);
      $name =~ s/%(..)/pack("c",hex($1))/ge;  # These lines reverse the %nn encodings
      $value =~ s/%(..)/pack("c",hex($1))/ge; # CGI does for punctuation marks and such.
      Taint::taint($name, $value) if $opt{taint};
      $input{$name}=$value;
    }}

  if ($formdata) {
    return \%input;
  } else {
    return undef;
  }
}

42;