Ich verwende für Auswertungen von Squid-Access-Logs gerne Squidanalyzer (http://squidanalyzer.darold.net/). Um leichter z.B. unanständige URLs zu finden, um diese anschließend sperren zukönnen, habe ich mir ein Perl-Script geschrieben. Es funktioniert recht gut, aber hat noch Probleme mit bestimmten Sonderfällen (aber die sind nicht so kritisch; z.B. Tippfehler bei URLs :o) ). Das Script schlüsselt dann sehr schön auf, welche IP und wie oft im entsprechenden Zeitraum eine URL aufgerufen wurde. Am Schluß findet man dann Regex-Vorschläge, die man in eine entsprechende ACL-File hinzufügen kann.
Am einfachsten man erstellt das Script im Squidanalyzer-Verzeichnis in dem die Report-Files landen.
Hinweis: Auf Datenschutz, etc. von Usern achten!!!
Beispiel Anwendung:
./squidanalyzer-search.pl naughty-url 2016/03/users
Beispiel Ausgabe:
URL count IPs naughty-url.com 1 192.168.125.125 naughty-url-something-else.net 1 192.168.123.123 Squid-Block-Regex naughty\-url\.com$ naughty\-url\-something\-else\.net$
Script – search-squidanalyzer.pl
#!/usr/bin/env perl
##############
## 2016-03-09
## Script by Clemens Steinkogler (clemens[at]steinkogler.org) - looking for naughty URLs ;o)
## Copyright (C) 2016 Clemens Steinkogler
##
## This program is free software; you can redistribute it and/or modify it under the terms of the
## GNU General Public License as published by the Free Software Foundation; either version 2 of
## the License, or (at your option) any later version. This program is distributed in the hope
## that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
## details. You should have received a copy of the GNU General Public License along with this program;
## if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
## MA 02111-1307 USA
use strict;
use warnings;
use Data::Dump qw(dump);
use List::MoreUtils qw(uniq);
use Text::Table;
use Text::Trim qw(trim);
use Scalar::MoreUtils qw(empty);
use Mozilla::PublicSuffix qw(public_suffix);
# everything quick and dirty :o)
my $pattern = $ARGV[0];
my $path = $ARGV[1];
chomp($pattern, $path);
my @user_urls;
my @user_urls_for_stats;
my $user_ip;
my @uniq_user_urls;
my %counts;
my $urls_stats = Text::Table->new("URL", "count", "IPs", "Squid-Block-Regex");
my $url;
my $squid_regex;
my %user_stats;
my $match = "";
my @files = <$path/*/*.html>;
#print dump(@files);
# we search every file we found
foreach my $file (@files) {
# filename = ip of user
$user_ip = "$file";
$user_ip =~ s/.*\/(.*)\.html/$1/;
#print $user_ip;
open(FILE, "$file");
# we are looking for the given pattern in the current file selected
while( my $line= <FILE> ){
if ( $line =~ m/\<td\>\<a href.*\>(.*$pattern.*)\<\/a\>\<\/td\>/ ){
$url = $1;
#if ( $url =~ m/www\.(.*)/) {
# $url = $1;
#}
#push @user_urls, $url;
push @user_urls_for_stats, $url;
}
}
# we put the created array as a value into the proper key
if ( scalar @user_urls_for_stats >= 1 ) {
@{$user_stats{"$user_ip"}{"urls"}} = @user_urls_for_stats;
push @user_urls, @{$user_stats{"$user_ip"}{"urls"}};
}
undef @user_urls_for_stats;
close FILE;
}
#dump(@user_urls);
#dump(%user_stats);
#@uniq_user_urls = uniq(@user_urls);
#@uniq_user_urls = sort(@uniq_user_urls);
#dump(@uniq_user_urls);
$counts{$_}++ for @user_urls;
#dump(\%counts);
# now create a table with url - how often it was found between the users - the ips that browsed that url and the proper formatted squid block acl
foreach my $most_browsed_url ( sort { $counts{$a} <=> $counts{$b} } keys %counts ) {
foreach my $user_stats_key ( %user_stats ) {
my $tmp_match = grep { /^$most_browsed_url$/ } @{$user_stats{$user_stats_key}{"urls"}};
if ( $tmp_match == 1 ) {
$match = $match.$user_stats_key."\n";
}
}
my $tld = public_suffix($most_browsed_url) or warn "Invalid TLD found in '$most_browsed_url'";
$most_browsed_url =~ /([^.]+).\Q$tld\E$/ or warn "Couldn't find domain name in '$most_browsed_url'";
my $domainname= $1;
#print "Domain name is $domainname.$tld\n";
my $most_browsed_domain = "$domainname.$tld";
$squid_regex = $most_browsed_domain;
$squid_regex =~ s/\./\\\./g;
$squid_regex =~ s/\-/\\\-/g;
$squid_regex = $squid_regex."\$";
$urls_stats->add($most_browsed_url, $counts{$most_browsed_url}, $match, $squid_regex);
$match = "";
}
print $urls_stats->select( 0, 1, 2, )." \n";
# here we select the fourth column
my $squid_blocks = $urls_stats->select( 3, );
# we stringify that colum - this creates, in this case, an array with all column lines in one element
my @table_stringify = map { $_->stringify() } $squid_blocks;
chomp($table_stringify[0]);
# we remove not needed whitespaces and replace them with a newline
$table_stringify[0] =~ s/\s+\n/\n/g;
$table_stringify[0] =~ s/\n/\n /g;
#print $table_stringify[0]."\n";
#dump($table_stringify[0]);
#print "\n";
my @uniq_dom = split /\n /, $table_stringify[0];
chomp(@uniq_dom);
trim(@uniq_dom);
#dump(@uniq_dom);
print "$_\n" for uniq @uniq_dom;
print "\n";
Kommentare