#!/usr/bin/perl
###############

$|++;
use LW;
use strict;
use Data::Dumper;

##
#   execution starts here.
##

my $VERSION = "0.4";

# test for patched libwhisker-1.2
if (eval('LW::crawl_normalize_url("/../../TEST/")') ne "TEST")
{
    print STDERR "ERROR: You need the patched version of libwhisker-1.2.\n";
    print STDERR "       The latest copy of this patch can be found at:\n";
    print STDERR "       http://www.digitaloffense.net/insurshun/\n\n";
    exit(1);
}

# read target url
my $start_url = shift() || usage();
my (%hin, %hout, %jar, %track);
my (%forms, $current_uri, $current_form, $form_count, $current_select, %randpool);
my $forms = \%forms;

LW::http_init_request(\%hin);

# configure the crawler
LW::crawl_set_config('skip_ext', ".gif .jpg .zip .tar .gz .tgz .swf .css .pdf .png .rpm .tgz .ps .jar .class .js .ppt .doc .xls .mdb ");
LW::crawl_set_config('reuse_cookies', 1);
LW::crawl_set_config('save_cookies', 1);
LW::crawl_set_config('save_offsite_urls', 1);
LW::crawl_set_config('follow_moves', 1);
LW::crawl_set_config('use_params', 1);
LW::crawl_set_config('do_head', 0);
LW::crawl_set_config('callback', 0);
LW::crawl_set_config('source_callback', \&source_parser);

# parse any extra libwhisker options
while (shift()) { 
my ($optVar, $optVal) = split(/=/);
LW::crawl_set_config($optVar, $optVal); }

print STDERR ":: starting crawler...\n";
LW::crawl($start_url, 1984, \%track, \%hin);
LW::http_reset();

print STDERR ":: crawler finished.\n";
print STDERR ":: located " . scalar(keys(%{$main::forms})) . " forms.\n";

print Data::Dumper->Dump([$main::forms], [qw(forms *ary)]);

##
#  start subroutines
##

sub usage { 
    print STDERR "usage: $0 http://www.example.com/uri/\n\n"; 
    exit(1); 
}

sub get_random {
    my $result = 0;
 
    # intitialize pool
    if (scalar(keys(%{$main::rand_pool})) == 0) 
    { 
        srand(int($$) + int(time())); 
    }
    
    # get a unique random integer
    while ($result == 0)
    {
        $result = int(rand() * 1000000);
        if ($main::rand_pool->{$result} == 1)
        {
            $result = 0;
        }
    } 
    $main::rand_pool->{$result} = 1;

    return $result;
}

sub source_parser {
    my ($rhin, $rhout) = @_;
    my %hin = %{$rhin};
    my %hout = %{$rhout};
    my $data = $hout{'whisker'}->{'data'};

    print STDERR "OUT: " . $hout{'whisker'}->{'http_resp'} . "\t";
    print STDERR "URI: " . $hin{'whisker'}->{'uri'} . "\n";

    $main::form_count = 0;
    $main::current_form = "";
    $main::current_uri = $hin{'whisker'}->{'uri'};
    
    
    if($main::current_uri =~ /\?/) { query_string_parser($main::current_uri); }
    
    LW::html_find_tags(\$data, \&tag_parser);
}

sub url_decode {
    local $_ = @_ ? shift : $_;
    defined or return;
    
    tr/+/ /;
    s/%([a-fA-F0-9]{2})/pack "H2", $1/eg;
    return $_;
}

sub query_string_parser {
    my ($uri) = @_;    
    my ($url, $qstring) = split(/\?/, $uri);

    my (%qkeys, @qpairs, $cpair, $qvar, $qval);
    my $form_key = $url . "#-#GET#-#" . get_random();

    @qpairs = split(/\&/, $qstring);
    foreach $cpair (@qpairs)
    {
        ($qvar, $qval) = split(/\=/, $cpair);
        $qvar = url_decode($qvar);
        $qval = url_decode($qval);
        $qkeys{$qvar} = $qval;
    }
    
    if(scalar(keys(%qkeys)) == 0) { return 0; }

    $main::forms->{$form_key}->[0]->[0] = $url;
    $main::forms->{$form_key}->[0]->[1] = "GET";
    $main::forms->{$form_key}->[0]->[2] = $main::current_uri;
    $main::forms->{$form_key}->[0]->[3] = 1984;
    
    foreach $qvar (keys(%qkeys))
    {
        push @{$main::forms->{$form_key}}, [$qvar, "hidden", $qval, undef];
    }
}

sub tag_parser {
    my ($tag, $rattr) = @_;
    my %attr = %{$rattr};
    
    if ($tag eq "form")
    { 
        my $action = $attr{"action"};
        my $method = $attr{"method"};
        my $form_key;

        $action =~ s/\s+//g;
        $method =~ s/\s+//g;

        if (length($action) == 0) { $action = $main::current_uri; }
        if (length($method) == 0) { $method = "GET"; }

        $main::form_count++;
        $form_key = $main::current_uri . "#-#" . $action . "#-#" . $main::form_count;

        $main::current_form = $form_key;

        if (exists($main::forms{$form_key}))
        {
            print "DUPLICATE FORM: $form_key\n";
        } else {
            print "NEW FORM: $form_key\n";
        }

        # fill in the form information
        $main::forms->{$form_key}->[0]->[0] = $action;
        $main::forms->{$form_key}->[0]->[1] = $method;
        $main::forms->{$form_key}->[0]->[2] = $main::current_uri;
        $main::forms->{$form_key}->[0]->[3] = $main::form_count;

    }

    if ($tag eq "/form")
    {
        $main::current_form = "";
    }

    if ($tag eq "input" && defined($attr{'name'}))
    {
        push @{$main::forms->{$main::current_form}}, [$attr{'name'}, $attr{'type'}, $attr{'value'}, $attr{'maxlength'}];
    }
    
    if ($tag eq "textarea" && defined($attr{'name'}))
    {
        push @{$main::forms->{$main::current_form}}, [$attr{'name'}, $attr{'type'}, $attr{'value'}, $attr{'maxlength'}];
    }
    
    if ($tag eq "select" && defined($attr{'name'}))
    {
        push @{$main::forms->{$main::current_form}}, [$attr{'name'}, 'select', $attr{'value'}, $attr{'maxlength'}];
        $main::current_select = $attr{'name'};
    }
    
    if ($tag eq "/select")
    {
        $main::current_select = "";
    }    

    if ($tag eq "option" && defined($attr{'value'}) && $main::current_select ne "")
    {
        push @{$main::forms->{$main::current_form}}, [$main::current_select, 'option', $attr{'value'}, undef];
    }

}

