Ich verwende für Auswertungen von Squid-Access-Logs gerne Squidanalyzer (http://squidanalyzer.darold.net/). Um leichter z.B. unanständige URLs zu finden, um diese anschließend sperren zukönnen, habe ich mir ein Perl-Script geschrieben. Es funktioniert recht gut, aber hat noch Probleme mit bestimmten Sonderfällen (aber die sind nicht so kritisch; z.B. Tippfehler bei URLs :o) ). Das Script schlüsselt dann sehr schön auf, welche IP und wie oft im entsprechenden Zeitraum eine URL aufgerufen wurde. Am Schluß findet man dann Regex-Vorschläge, die man in eine entsprechende ACL-File hinzufügen kann.
Am einfachsten man erstellt das Script im Squidanalyzer-Verzeichnis in dem die Report-Files landen.
Hinweis: Auf Datenschutz, etc. von Usern achten!!!
Beispiel Anwendung:
./squidanalyzer-search.pl naughty-url 2016/03/users
Beispiel Ausgabe:
URL count IPs naughty-url.com 1 192.168.125.125 naughty-url-something-else.net 1 192.168.123.123 Squid-Block-Regex naughty\-url\.com$ naughty\-url\-something\-else\.net$
Script – search-squidanalyzer.pl
#!/usr/bin/env perl ############## ## 2016-03-09 ## Script by Clemens Steinkogler (clemens[at]steinkogler.org) - looking for naughty URLs ;o) ## Copyright (C) 2016 Clemens Steinkogler ## ## This program is free software; you can redistribute it and/or modify it under the terms of the ## GNU General Public License as published by the Free Software Foundation; either version 2 of ## the License, or (at your option) any later version. This program is distributed in the hope ## that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. You should have received a copy of the GNU General Public License along with this program; ## if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, ## MA 02111-1307 USA use strict; use warnings; use Data::Dump qw(dump); use List::MoreUtils qw(uniq); use Text::Table; use Text::Trim qw(trim); use Scalar::MoreUtils qw(empty); use Mozilla::PublicSuffix qw(public_suffix); # everything quick and dirty :o) my $pattern = $ARGV[0]; my $path = $ARGV[1]; chomp($pattern, $path); my @user_urls; my @user_urls_for_stats; my $user_ip; my @uniq_user_urls; my %counts; my $urls_stats = Text::Table->new("URL", "count", "IPs", "Squid-Block-Regex"); my $url; my $squid_regex; my %user_stats; my $match = ""; my @files = <$path/*/*.html>; #print dump(@files); # we search every file we found foreach my $file (@files) { # filename = ip of user $user_ip = "$file"; $user_ip =~ s/.*\/(.*)\.html/$1/; #print $user_ip; open(FILE, "$file"); # we are looking for the given pattern in the current file selected while( my $line= <FILE> ){ if ( $line =~ m/\<td\>\<a href.*\>(.*$pattern.*)\<\/a\>\<\/td\>/ ){ $url = $1; #if ( $url =~ m/www\.(.*)/) { # $url = $1; #} #push @user_urls, $url; push @user_urls_for_stats, $url; } } # we put the created array as a value into the proper key if ( scalar @user_urls_for_stats >= 1 ) { @{$user_stats{"$user_ip"}{"urls"}} = @user_urls_for_stats; push @user_urls, @{$user_stats{"$user_ip"}{"urls"}}; } undef @user_urls_for_stats; close FILE; } #dump(@user_urls); #dump(%user_stats); #@uniq_user_urls = uniq(@user_urls); #@uniq_user_urls = sort(@uniq_user_urls); #dump(@uniq_user_urls); $counts{$_}++ for @user_urls; #dump(\%counts); # now create a table with url - how often it was found between the users - the ips that browsed that url and the proper formatted squid block acl foreach my $most_browsed_url ( sort { $counts{$a} <=> $counts{$b} } keys %counts ) { foreach my $user_stats_key ( %user_stats ) { my $tmp_match = grep { /^$most_browsed_url$/ } @{$user_stats{$user_stats_key}{"urls"}}; if ( $tmp_match == 1 ) { $match = $match.$user_stats_key."\n"; } } my $tld = public_suffix($most_browsed_url) or warn "Invalid TLD found in '$most_browsed_url'"; $most_browsed_url =~ /([^.]+).\Q$tld\E$/ or warn "Couldn't find domain name in '$most_browsed_url'"; my $domainname= $1; #print "Domain name is $domainname.$tld\n"; my $most_browsed_domain = "$domainname.$tld"; $squid_regex = $most_browsed_domain; $squid_regex =~ s/\./\\\./g; $squid_regex =~ s/\-/\\\-/g; $squid_regex = $squid_regex."\$"; $urls_stats->add($most_browsed_url, $counts{$most_browsed_url}, $match, $squid_regex); $match = ""; } print $urls_stats->select( 0, 1, 2, )." \n"; # here we select the fourth column my $squid_blocks = $urls_stats->select( 3, ); # we stringify that colum - this creates, in this case, an array with all column lines in one element my @table_stringify = map { $_->stringify() } $squid_blocks; chomp($table_stringify[0]); # we remove not needed whitespaces and replace them with a newline $table_stringify[0] =~ s/\s+\n/\n/g; $table_stringify[0] =~ s/\n/\n /g; #print $table_stringify[0]."\n"; #dump($table_stringify[0]); #print "\n"; my @uniq_dom = split /\n /, $table_stringify[0]; chomp(@uniq_dom); trim(@uniq_dom); #dump(@uniq_dom); print "$_\n" for uniq @uniq_dom; print "\n";
Kommentare