#!/usr/bin/perl -w
## =================================================================
## Watcher --- find suspicious packet traces in Argus
## =================================================================

=head1 NAME

Watcher -- Script to watch an argus server for scanning activity

=head1 SYNOPSIS

watcher <options>

C<-i>            Don't file incident reports (only useful if you 
are using UoA's incident logging system).

C<-H> <hostname> Host runnning argus default "$Argus::Host"

C<-P> <port num> port number to that argus is listening on

C<-W> <email address>  Who to send mail to

C<-F> <file name> read from argus file ....

C<-i> switch is only useful if you are using UoA's incident logging
system.

=head1 DESCRIPTION

This software requires a connection via ra to an Argus-2.0 server.

This script attempts to detect systematic probing of address on
a network or ports on a host from another address.

To try and find the weak signal in all the network noise it focuses
on tcp connections that did not get established and those that transfer
no application data.  We ignore various chaff eg. lone RST and SYN+ACK
which are generated by DoS attacks on third parties where our IP
addresses were spoofed.  I don't know of any tools that use such
packets for information gathering.

The script also raise the reporting threshold for scans where the 
destination addresses appear random e.g. ICQ sessions involve
hundreds of connections.

=head1 History

9th Jan 01  First distribution -- treat this as beta software
there is some very new code here.  Over Xmas I inadvertantly broke
the code that wiped the 'memory' periodically and found that
the scanner became much more sensitive -- what surprised me was that
it kept running for a week before I killed it.  At that time it was
using about 50MB memory.  I have added code and a new parameter
to control how many IP address the system will track.  The list is
purged periodically on a least recently seen basis. 

The script is still under active developement as I explore the
new features of Argus 2.0

=cut

# Support for the UoA incident logging system

#BEGIN { require '/home/argus/web/lib/init.pl'; }
#use Incidents;


#^# use lib '$Argus::Home/lib';  
use lib '/home/argus/lib';  

use POSIX qw(strftime);
use strict;
use Getopt::Std;
use Time::Local;
use Argus;
use Argus::Watcher;

use vars qw($opt_H $opt_F $opt_P $opt_W  $opt_i $Report_index %Rate_threshold
            $RF_modtime );

getopts("iH:P:W:F:") || die "Invalid options found";

#               reporting thresholds
# -i            Don't file incident reports (set by -t)
# -H <hostname> Host runnning argus default "localhost"
# -P <port num> port number to that argus is listening on
# -W <email address>  Who to send mail to
# -F <file name> read from argus file ....
#

Argus::setup_logging('Watcher');

#WHO gets reports...

my $WHO = $opt_W || $Argus::Notify;

if( defined $opt_i or
    ( defined $Report_index and! -f $Report_index) ) {  
        undef $Report_index;   # don't write reports to disk
} 

my $ArgusHost = $opt_H || $Argus::Def_Host;
my $ArgusPort = $opt_P || $Argus::Def_Port;

$RF_modtime = 0;
get_rates();      # read rate file if it exists

my $start = time;
my %sources ;
my %sources_packets;
my %report;
my @summaries;

my $COUNT = 0;
my $FTIME = 1;  # first time we saw traffic from this IP
my $RTIME = 2;  # last time we *sent* a report for this IP
my $LTIME = 3;  # last time this IP went over reporting threshold
my $TYPE  = 4;

# ra has a memory leak (by design) since it caches all IPs and their ascii
# translations.  We restart the ra process once a day to stop it growing
# too big.

my ($day, $t );
my $count = 0;
my $first = 1;
my $sources = 0;


for(;;) {  # loop forever go around once per day
    $day = (localtime(time))[3];
    $count = 0;

    if ( defined $opt_F ) {  # read from a file
       open(RA, "$Argus::Client_path/ra -F $Argus::Home/lib/ra.conf -u -Zs ".
	        " -r $opt_F  $Argus::Watcher::RA_Filter|") ||
	   die "Can't open '$opt_F:$!";
    }   else {  # connect to a live server
       if (! open(RA, "$Argus::Client_path/ra -F $Argus::Home/lib/ra.conf -u".
		      " -Zs -I -S $ArgusHost -P $ArgusPort " .
		      "$Argus::Watcher::RA_Filter|") ) {
	   Argus::mail( $WHO, "Watcher failed to start ra -- exiting\n");
	   exit;
       }
    }

    while(<RA>) {  # read data from RA
	$count ++;
	chomp;
	store( $_ );

	if (((my $t=time) - $start) > $Argus::Watcher::Period ) {
	    check_sources();
	    last if( $day ne (localtime($t))[3] );  # over midnight...
	}
    }
    close(RA); # kill off current ra process

    if( $count == 0 ) { # no data read!
	Argus::mail( $WHO, "Watcher failed to start/restart", 
	      "ra returned no records -- exiting\n");
	exit;
    }

# prune report list -- dump everthing over a day old

    my $t = time;

    foreach my $ip ( keys %report ) {
	delete $report{$ip} if ($report{$ip}->[$LTIME] - $t) > 86400;
    }

#send summaries

    Argus::mail( $WHO, "watcher keep alive", 
		 "Just to remind you I'm still here watching\n", @summaries);
    undef @summaries;

}

exit;

######################################################################

# store takes a tab delimited argus record and then splits it into 
# individual variables, data is filtered on tcp flags and a few other
# things (eg traceroutes)  and what remains is recorded in the 
# %sources hash.

sub store {
   my ($atime, $flags, $proto, $source, $sp, $dir, $Dest, $dp,  $state) =
       split (/\t/, shift);
   my $dest;

   if( ! defined $state ) { 
       s/\t/,/g;
       Argus::Log("Bad rec '$_'");
   }
   return if $source eq "255.255.255.255";  # ignore broadcasts

# ignore traffic for recently reported addresses

   return if(defined $report{$source} and 
             (time - $report{$source}->[$RTIME]) < $Argus::Watcher::ReReport);

   if( $flags =~ /f/ ) {  # unmatched fragments
       $dp = 'frag';
   }

# $dest has form IP.protocol-port number  eg. 130.216.1.1.tcp-80

    if( $proto eq 'tcp' ) { # ignore lone resets and normal FINs 
	                    # and anything with a syn+Ack
	return if $state =~ /^F\w*A|^R|^S.?PA/; 
	$dest .= "$Dest.tcp-$dp";
    } elsif( $proto eq 'udp' or $proto eq 'rtp' ) { 

        # ignore traceroutes and bidirectional traffic

        return if(  $state ne 'INT' or ($dp ne 'frag' and
              ($dp > 33400 and $dp < 33600 )));

	$dest .= "$Dest.udp-$dp";
    }elsif( $proto eq 'icmp' ) { # Ignore timeouts and unreachables    
	return if $state eq 'TIM' or $state =~ /^UR/; 
	$dest .= "$Dest.icmp-$state";
    }elsif( $proto eq 'ip' ) {  # must be a fragment
	$dest .= "$Dest.ip";
    } else {
        $dest .= "$Dest.$proto";
    }

   my $t = time();

    if (!exists $sources{$source}) {  # seen traffic from this source?
	$sources++;
	$sources{$source} = {};
	$sources{$source}->{Time} = $sources{$source}->{FTime} = $atime;
    }
   $sources{$source}->{LTime} = $t;
   $sources{$source}->{$dest} = $t;   

    ## Only store packets for things that have had lots of pairs

    if (scalar keys %{$sources{$source}} >
	get_interest_threshold($source) - 10 ) {
	if (!exists $sources_packets{$source}) {
	    $sources_packets{$source} = [];
	}
	if ( @{$sources_packets{$source}} < $Argus::Watcher::Packet_Save) {
           push(@{$sources_packets{$source}}, join("\t", $atime,
                         $proto, $source, $sp, $dir,
                         $Dest, $dp, $state ));
	} else {  # may report here ?
            get_rates();
	    report($source, $sources{$source}, $sources_packets{$source});
	    delete $sources{$source};
	    delete $sources_packets{$source};
	}
    }
}

# Get rates checks to see if the rates file has changed and rereads it if it has

sub get_rates{
    my %Unit = (
               'sec'  => 1,
               'second'  => 1,
               'min' => 60,
               'minute' => 60,
               'hour' => 3600,
               'day'  => 84600
          );


    if( defined $Argus::Watcher::RateFile and -r $Argus::Watcher::RateFile ) {
      my @stat = stat $Argus::Watcher::RateFile;

      return if $stat[9] <= $RF_modtime;

      $RF_modtime = $stat[9];

      if( ! open(RF, $Argus::Watcher::RateFile) ) {
          Argus::Log("Failed to open rate file '$Argus::Watcher::RateFile':$!");
          return;
      }

      while (<RF>) {
          chomp;
          next if /^\s*#|^\s*$/;   # comment
          s/#.*//;           # trailing comment
          my($addr, $rate, $per, $unit) = /^\s*(\S+)\s+(\d+)(\s+per\s+)?(\w+)?/;
          if( ! defined $rate ) {
            Argus::Log("$_ is not valid line in rates file - ignored");
              next;
          }

          if( defined $unit ) {
              if( defined $Unit{$unit} ) {
                  $rate = $rate/$Unit{$unit};  # convert to rate/sec
              } else {
                  Argus::Log( "'$unit' not valid unit in '$_'");
              }
          } else { $unit = 'sec' }

          $Rate_threshold{$addr} = $rate;
#         Argus::Log(join("\t", $addr, $rate, $unit, $rate));
      }
      close(RF);
    }
}



# we sort list into Least Recently Seen order so pruning the end
# deletes the flows that we have not seen data for for the longest
# period

sub sortlt{
    return $sources{$::b}->{LTime} <=> $sources{$::a}->{LTime} ;
}

# cycle through the %remote hash and report any sites over the activity
# threshold and prune the list

sub check_sources {
    my $c = 0;
    my $tt;
 
    get_rates();  # check the rate file first...

    foreach my $ip (sort sortlt keys %sources) {
	$c++;
	my $t = time;
	if( scalar keys %{$sources{$ip}} > get_interest_threshold($ip) ) {
	    report( $ip, $sources{$ip}, $sources_packets{$ip});
	    delete $sources{$ip} ;
	    delete $sources_packets{$ip};
	} elsif ($c > $Argus::Watcher::Sources ) {  # discard LRA    
	    delete $sources{$ip} ;
	    delete $sources_packets{$ip};
	} else {
	    $tt = $sources{$ip}->{LTime};
	}
    }

    $start = time();
}

sub report {
    my ($ip, $source, $sources_packets ) = @_;
    my $time = time();
    my( $who, $shorthn, $pktime, $when, $duration, $rate, $units );

# check rate threshold and return if less
    if( my $p =  scalar @$sources_packets ) {
      my ($ist, $r1) = split(/\t/, $sources_packets->[0], 2);
      my ($last, $r2) = split(/\t/, $sources_packets->[$p-1], 2);

      my $d = $last - $ist;
      if ( $d <= 0 ) {
          $d = 1;
      }

      $rate = scalar @$sources_packets/$d;

      if( defined $Rate_threshold{$ip} ) {
          return if $rate < $Rate_threshold{$ip} ;
      }
    }

    if( defined $report{$ip} ) {  # existing 
	$report{$ip}->[$LTIME] = $time;
	return if(($time - $report{$ip}->[$RTIME])<$Argus::Watcher::ReReport);
    } else {
	$report{$ip} = [];
	$report{$ip}->[$COUNT] = 0;
	$report{$ip}->[$FTIME] = $time;
    }

    $report{$ip}->[$LTIME] = $time;    
    $report{$ip}->[$RTIME] = $time;
    $report{$ip}->[$COUNT]++;

    $pktime = $source->{FTime};

    if (!defined  $pktime ||  $pktime == -1 ) {
	$when = $source->{Time};  # time first packet seen
	$when = $time if (! defined $when) ;
    }
    else { $when = $pktime }


    delete $source->{Time};    # so it does not interfer with sort
    delete $source->{FTime};      # ditto
    delete $source->{LTime};      # ditto
    delete $source->{PTime};      # ditto

    my $c = scalar keys %{$source};  # now only addresses
    my @lt = localtime($when);
    my $day =  strftime("%Y.%m.%d",  @lt);
    my $t = strftime("%H.00",  @lt);
# Host name 
    $who = gethostbyaddr(pack('C4', split(/\./, $ip)), 2) || '';
	
    if($who =~ /$Argus::Local_domain_re/o) {
	$shorthn = length($who) > 20 ? substr($who,0,20).'*' :
	    $who;
    }else{
	$shorthn = length($who) > 20 ? '*'.substr($who,-20) : 
	    $who;
    }
    my $local = $who =~ /$Argus::LocalTimeZone_re/i;

    $who .= "[$ip]";
    $shorthn .= "[$ip]";
    
    my $descr;
    
# build report body by summarising the desination IPs and ports
    my @lines = Argus::list_ips($ip, \$descr,
				[sort Argus::cmp_ip keys %{$source} ]);

    $units = "per second";

    if( $rate < 1 ) {
	$rate *= 60;
	$units = "per minute";
    }
    if( $rate < 1 ) {
	$rate *= 60;
	$units = "per hour";
    }
    if( $rate < 1 ) {
	$rate *= 24;
	$units = "per day";
    }

    my $fr = sprintf("%d $units", $rate);

# we just write a summary for some types of activity
    if( $descr =~ /ignore/ ) {  
      push( @summaries,  strftime("%a %d %b %Y at %H:%M (%Z)", 
                              localtime($when)) . " $who - $fr - $descr\n");
      return;
    }
      
    my @report ;  # construct the report
    
    push(@report, "The data for around this time can be found in\n");
    push(@report,  "~argus/data/$day/argus-$day.$t.gz\n\n");
    
    push(@report,  "We saw $who talk to $c ports/addresses(s)\n");
    if ( $local ) {  # in the local timezone - don't bother with UTC
	push(@report,  "on " . strftime("%a %d %b %Y at %H:%M (%Z)",
					localtime($when)) . "\n\n");
    } else {
	push(@report,  "on " . strftime("%a %d %b %Y at %H:%M (UTC)",
					gmtime($when)) . "\n\n");
	push(@report,  "-- " . strftime("%a %d %b %Y at %H:%M (%Z)",
					localtime($when)) . "\n\n");
    }
    
    push(@report, "Connection rate approx $fr\n\n");

    push(@report, Argus::print_block( @lines ));

    my $TZ = 1200 + (localtime($when))[8] * 100;

    if (defined $sources_packets) {
	push(@report, "\n\nSome sample packet traces were: " .
	     " Times UTC +$TZ $Argus::Accuracy");
      push(@report, "\n");
      foreach my $rec (@{$sources_packets}) {
          my @r = split(/\t/, $rec);
          my $t = shift @r;
          $t = strftime("%G-%m-%d-%T", localtime($t));
          push(@report, sprintf("%19s %4s %15s:%-6s  %3s %15s:%-6s %s\n",
                                $t, @r));
      }
      push(@report, "\n");
    }
	
# and mail it
  Argus::mail($WHO, "$shorthn - $descr\n", @report);

# and possibly write it to disk
  Incidents::add("$who\t$when", "$day.$t", $descr, @report)
      if defined $Report_index;
    
}


sub get_interest_threshold {
    my $i = shift;

    return ($i =~ /$Argus::Local_IP_re/o) ? 
	$Argus::Watcher::Local_Interest_Threshold :
	$Argus::Watcher::Remote_Interest_Threshold;
}


## =================================================================
