#!/usr/bin/perl #--------------------------------------- # # Email Counter # Writen by MadHat (madhat@unspecific.com) # http://www.unspecific.com/count/ # # count.pl is to keep score on some of the mailing lists I have been # on for a while. What it does is count the emails, domains or suffixes # to tell how many emails, lines and new lines have been posted to the # list. It reads a standard mbox format files. # I have tested it with mutt, pine, evolution, and Eudora. # # # Copyright (c) 2001-2002, MadHat (madhat@unspecific.com) # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the distribution. # * Neither the name of MadHat Productions nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # #--------------------------------------- use Getopt::Std; use Date::Manip; use Mail::MboxParser; local ($opt_L, $opt_N, $opt_G); $VERSION = '2.4.15'; getopts('edsMhluELNGTHD:m:f:t:v'); if ($opt_D) { $opt_v = 1; } if ( !($opt_e xor $opt_d xor $opt_s xor $opt_M) or $opt_h ) { &usage } print "
\n" if ($opt_H);
$opt_t = 'today' unless ($opt_t);
$opt_f = 'Dec 31, 1969' unless ($opt_f);
print "$opt_f - $opt_t\n" if ($opt_v);
$date1 = &ParseDate("$opt_f 00:00:00");
$date2 = &ParseDate("$opt_t 23:59:59");
$mailbox = $ARGV[0];
if ($mailbox =~ /^http\:/) {
print "Using LWP to fect mailbox\n" if ($opt_v);
eval {
if ($mailbox =~ /gz$/) {
$tmpfile = "/tmp/.count" . time . ".gz";
} else {
$tmpfile = "/tmp/.count" . time;
}
use LWP::Simple;
mirror($mailbox, $tmpfile);
$tmpbox = 1;
$mailbox = $tmpfile;
}
}
if ( ! -e $mailbox ) {
print "There appears to be a problem. Mailbox not found\n";
exit;
}
if ($mailbox =~ /\.gz$/) {
print "Decompresing mailbox\n" if ($opt_v);
$gzip = `which gunzip 2>/dev/null`;
if ( !$gzip ) {
print "Unable to find gunzip to decompress file.\n" if ($opt_v);
$gzip = `which gzip 2>/dev/null`;
if ( !$gzip ) {
print "Unable to find gzip to decompress file.\n" if ($opt_v);
print "ERROR: Unable to decompress mailbox.\n";
exit;
}
}
chomp $gzip;
`$gzip -d $mailbox`;
$mailbox =~ s/\.gz$//;
}
print "Opening mailbox $mailbox\n" if ($opt_v);
$mbx = Mail::MboxParser->new($mailbox);
$mbx->make_index;
$msgc = 0;
print "Evaluating messages\n" if ($opt_v);
MESSAGE: for $msg ($mbx->get_messages) {
printf STDERR '-' x 72 . "\nMSG Num: %6.5d\nMSG Start Pos: %10.10d\n",
$msgc, $mbx->get_pos($msgc) . "\n" if ($opt_D);
my $lines = $new_lines = $html = $toppost = $quotes = $footer = $PGP = $PGPSig = 0;
$date = $msg->header->{'date'};
$date3 = &ParseDate($date);
$start = &Date_Cmp($date1,$date3);
$end = &Date_Cmp($date2,$date3);
print STDERR "Date_Cmp: $date1 < $date3 > $date2\n" if ($opt_D > 1);
print STDERR "Date_Cmp: $start <= 0 => $end\n" if ($opt_D > 1);
if ( $start <= 0 and $end >= 0) {
printf STDERR "Matched MSG Num: %6.5d\n", $msgc if ($opt_D);
if ($opt_D > 4) {
$headers = $msg->header;
for $head (sort keys %$headers) {
if (@{$msg->header->{$head}}) {
for $value (@{$msg->header->{$head}}) {
print STDERR "HEADER:::$head => $value\n"
}
} else {
print STDERR "HEADER::$head => " . $msg->header->{$head} . "\n"
}
}
print STDERR "-_" x 30 . "\n"
}
my $msgid = $msg->header->{'message-id'};
$msgid =~ s/[\<\>]//g;
my $to = $msg->header->{'to'};
$to =~ s/^[\s\S]*\<([\w\d\-\$\+\.\@\%\]\[]+)\>.*$/$1/;
$to =~ /^([\w\.\-]+)\@.*\.\w+$/;
my $sto = $1;
print STDERR "To: $to ($sto)\n" if ($opt_D > 1);
my $sub = $msg->header->{'subject'};
if ($opt_l and ($sub eq '(no subject)' or $sub eq '') ) { $html++; $bad++ }
print STDERR "Subject: $sub\n" if ($opt_D > 1);
my $references = $msg->header->{'references'};
$references =~ s/\s*//g;
$references =~ s/\<([\w\d\-\$\.\@\%\]\[]+)\>/$1\n/g;
my $replyto = $msg->header->{'in-reply-to'};
$replyto =~ s/\s*//g;
$replyto =~ s/^\s*\<([\w\d\-\$\.\@\%\]\[]+)\>.*$/$1/;
print STDERR "Message-ID: $msgid\n" if ($opt_D > 1);
print STDERR "In-Reply-To: $replyto\n" if ($opt_D > 1 and $replyto);
print STDERR "References: $references\n" if ($opt_D > 1 and $references);
if ( $msgid{$msgid} ) {
print STDERR "Duplicate Message: $msgid\n" if ($opt_D);
next;
} else {
$msgid{$msgid}++;
}
$count2++;
$email = $msg->from->{'email'};
print STDERR "From: $email\n" if ($opt_D > 1);
$email =~ tr/[A-Z]/[a-z]/;
$email =~ s/[\<\>]//g;
$email =~ s/ \(.+\)$//g;
$email =~ s/^\".+\"//;
if ($opt_e) {
$who = $email;
} elsif ($opt_d) {
$email =~ /^[\w\.\-]+\@(.*\.\w+)$/;
$who = $1;
} elsif ($opt_s) {
$email =~ /^[\w\.\-]+\@.*(\.\w+)$/;
$who = $1;
}
if ($opt_M) {
$Mcounter++;
my $mailer;
if ($mailer = $msg->header->{'x-mailer'}) {
print STDERR "X-Mailer: $mailer\n" if ($opt_D > 1);
} elsif ($mailer = $msg->header->{'x-mimeole'}) {
print STDERR "User-Agent: $mailer\n" if ($opt_D > 1);
} elsif ($mailer = $msg->header->{'user-agent'}) {
print STDERR "User-Agent: $mailer\n" if ($opt_D > 1);
} elsif ($msgid =~ /\@mail\.gmail\.com$/) {
$mailer{'GMail (Google)'}++;
print STDERR "MSGID $msgid -> Google GMail\n" if ($opt_D > 1);
} elsif ($msgid =~ /^Pine\.LNX\.\d\.\d{1,3}/) {
$mailer{'Pine (Linux)'}++;
print STDERR "MSGID $msgid -> Pine (Linux)\n" if ($opt_D > 1);
} elsif ($msgid =~ /^Pine\.CYG\.\d\.\d{1,3}/) {
$mailer{'Pine (Windows)'}++;
print STDERR "MSGID $msgid -> Pine (Windows)\n" if ($opt_D > 1);
} elsif ($msgid =~ /^Pine\.CYG\.\d\.\d{1,3}/) {
$mailer{'Pine (Cygwin)'}++;
print STDERR "MSGID $msgid -> Pine (Cygwin)\n" if ($opt_D > 1);
} elsif ($msgid =~ /^Pine\.BSF\.\d\.\d{1,3}/) {
$mailer{'Pine (FreeBSD)'}++;
print STDERR "MSGID $msgid -> Pine (FreeBSD)\n" if ($opt_D > 1);
} elsif ($msgid =~ /^Pine\.BSO\.\d\.\d{1,3}/) {
$mailer{'Pine (OpenBSD)'}++;
print STDERR "MSGID $msgid -> Pine (OpenBSD)\n" if ($opt_D > 1);
} elsif ($msgid =~ /^Pine\.BSD\.\d\.\d{1,3}/) {
$mailer{'Pine (BSD)'}++;
print STDERR "MSGID $msgid -> Pine (BSD)\n" if ($opt_D > 1);
} elsif ($msgid =~ /^Pine\.SUN\.\d\.\d{1,3}/) {
$mailer{'Pine (SunOS)'}++;
print STDERR "MSGID $msgid -> Pine (SunOS)\n" if ($opt_D > 1);
} elsif ($msgid =~ /^Pine\.SOL\.\d\.\d{1,3}/) {
$mailer{'Pine (Solaris)'}++;
print STDERR "MSGID $msgid -> Pine (Solaris)\n" if ($opt_D > 1);
} elsif ($msgid =~ /^Pine\.NEB\.\d\.\d{1,3}/) {
$mailer{'Pine (NetBSD)'}++;
print STDERR "MSGID $msgid -> Pine (NetBSD)\n" if ($opt_D > 1);
} elsif ($msgid =~ /^Pine\.BSI\.\d\.\d{1,3}/) {
$mailer{'Pine (BSDi)'}++;
print STDERR "MSGID $msgid -> Pine (BSDI)\n" if ($opt_D > 1);
} elsif ($msgid =~ /^Pine\.\w{3}\.\d\.\d{1,3}/) {
$mailer{'Pine'}++;
print STDERR "MSGID $msgid -> Pine (Other)\n" if ($opt_D > 1);
} elsif ($msgid =~ /\@webbox\.com/) {
$mailer{'WebBox'}++;
print STDERR "MSGID $msgid -> WebBox\n" if ($opt_D > 1);
} elsif ($msgid =~ /\@onebox\.com/) {
$mailer{'OneBox'}++;
print STDERR "MSGID $msgid -> OneBox\n" if ($opt_D > 1);
} elsif ($msgid =~ /yahoo\.com/) {
$mailer{'Yahoo'}++;
print STDERR "MSGID $msgid -> Yahoo\n" if ($opt_D > 1);
} elsif ($msgid =~ /hotmail\.com/) {
$mailer{'HotMail'}++;
print STDERR "MSGID $msgid -> HotMail\n" if ($opt_D > 1);
} elsif ($msgid =~ /hushmail\.com/) {
$mailer{'HushMail'}++;
print STDERR "MSGID $msgid -> HushMail\n" if ($opt_D > 1);
} else {
$mailer{'UNKNOWN'}++;
print STDERR "MSGID $msgid -> Unknown\n" if ($opt_D);
}
if ($mailer) {
&init();
for $agent (keys %mailer_agent) {
if ($mailer =~ /^$agent/) {
print STDERR "$mailer -> $agent -> $mailer_agent{$agent}\n"
if ($opt_D > 1);
$mailer{$mailer_agent{$agent}}++;
next MESSAGE;
}
}
print STDERR "No Match: $mailer\n" if ($opt_D);
} else {
# print STDERR "No Mailer Found\n" if ($opt_D);
}
next MESSAGE;
}
if (!$who) {
print STDERR "Unable to find _who_\n" if ($opt_D > 1);
print STDERR '-' x 75 . "\n" if ($opt_D);
$msgc++;
next MESSAGE;
} else {
print STDERR "Matched: $who\n" if ($opt_D > 1);
}
if (
$msg->header->{'x-originating-ip'}
and
$track{$msg->header->{'x-originating-ip'}}
and
$track{$msg->header->{'x-originating-ip'}} ne $who
) {
print STDERR "TRACE::" . $track{$msg->header->{'x-originating-ip'}}
. " and $who using " . $msg->header->{'x-originating-ip'} . "\n"
if ($opt_D > 4);
} elsif ($msg->header->{'x-originating-ip'}) {
$track{$msg->header->{'x-originating-ip'}} = $who;
print STDERR "IP::" . $msg->header->{'x-originating-ip'} . " => $who\n"
if ($opt_D > 4);
}
my $body = $msg->body($msg->find_body);
@msg_body = $body->as_lines;
if ($msg->is_multipart) {
@parts = $msg->parts;
if (@parts and $opt_l and !$html) {
print STDERR "Message $count2 has multiple parts\n" if ($opt_D);
print STDERR "Testing message $count2 for 'bad' parts\n" if ($opt_D);
my $i;
PARTS: for $i (0..$#parts) {
my $part_type = $parts[$i]->effective_type;
if ($part_type =~ /^text\/html|enriched/i
or $part_type =~ /^image|audio|application\/[^pgp-]/i
) {
print STDERR "MIME-Type: $part_type (*co*loser*ugh*)\n" if ($opt_D > 1);
$html++;
$bad++;
last PARTS;
} else {
print STDERR "MIME-Type: $part_type looks ok\n" if ($opt_D > 1);
}
}
}
}
LINE: for (@msg_body) {
next LINE if ( m/^$/ );
# Need to check for footers and Sigs and stuff
if (/^__________________________________________________$/) {
print STDERR "Possible Footer\n" if ($opt_D > 1);
$footer++;
} elsif ($footer and /^Do You Yahoo!\?$/) {
print STDERR "Yep, its a Yahoo footer, Skipping to next Message\n"
if ($opt_D > 1);
next MESSAGE;
} elsif (/^-----BEGIN PGP SIGNED MESSAGE-----$/) {
print STDERR "PGP Signed Message\n" if ($opt_D > 1);
print STDERR "PGP::$who $_" if ($opt_D > 4);
$PGP++;
next LINE;
} elsif ($PGP and /^Hash: (\w+)$/) {
print STDERR "PGP Hash Type ($1)\n" if ($opt_D > 1);
print STDERR "PGP::$who $_" if ($opt_D > 4);
next LINE;
} elsif (/^-----BEGIN PGP SIGNATURE-----$/) {
print STDERR "Begin PGP Signature\n" if ($opt_D > 1);
print STDERR "PGP::$who $_" if ($opt_D > 4);
$PGPsig++;
next LINE;
} elsif ($PGPsig and ! /^-----END PGP SIGNATURE-----$/) {
print STDERR "PGP::$who $_" if ($opt_D > 4);
next LINE;
} elsif ($PGPsig and /^-----END PGP SIGNATURE-----$/) {
print STDERR "END PGP Signature\n" if ($opt_D > 1);
print STDERR "PGP::$who $_" if ($opt_D > 4);
$PGPsig--;
next LINE;
}
#####################################
$lines++;
if ( ! m/^[ \t]*$|^[ \t]*[>:]|^\.\s\:\s/ ) {
$new_lines++;
if ($new_lines > 1 and !$quotes) {
$toppost++;
} elsif ($quotes and $toppost) {
$toppost = 0;
}
if ($opt_u) {
if (/(https?\:\/\/\S+)/) {
my $site = $1;
chomp $site;
$site =~ s/[\>\.\)]*$//;
$sub =~ s/^re\:\s//i;
$sub =~ s/^\[[\w\-\d\:]+\]\s//i;
if ($site =~ /(yahoo|msn|hotjobs|hotmail|your\-name|pgp|excite)\.com\/?$/
or $site =~ /(promo|click|docs)\.yahoo\.com/
or $site =~ /(join|explorer|messenger|mobile)\.msn\.com/
or $site =~ /mailman\/listinfo\/$sto$/
or $skipped{$site}) {
$skipped{$site}++;
print STDERR "Skipping $site ($skipped{$site})\n"
. " - from message '$sub'\n - from $who\n\n"
if ($opt_D > 1);
} else {
if (!$urls{$site}) {
print STDERR "Adding $site\n from message '$sub'\n from $who\n\n"
if ($opt_D > 1);
$contrib{$who}++;
$urls{$site} = $sub;
push @{$url_list{$sub}}, $site;
} else {
print STDERR "Skipping (duplicate) $site\n from message '$sub'\n from $who\n\n"
if ($opt_D > 1);
}
}
}
}
print STDERR "NEW($new_lines, $lines) $_" if ($opt_D > 2);
} else {
print STDERR "QUOT($lines) $_" if ($opt_D > 2);
$quotes++;
}
}
$tracker{$msgid} = $who;
if (@{$unordered{$msgid}}) {
my %counted = ();
print STDERR "Matched MSGID to Previous Reference\n"
if ( $opt_D > 1 );
REF: for my $ordered (@{$unordered{$msgid}}) {
if ($counted{$ordered}) {
print STDERR " `-Already Incremented for $who ($ordered)\n"
if ( $opt_D > 1 );
next REF;
} else {
$counted{$ordered}++;
$replyto{$who}++;
print STDERR " `-Incrimenting $who Troll Rating ($ordered)\n"
if ( $opt_D > 1 );
}
}
}
if ($replyto and $tracker{$replyto}) {
$replyto{$tracker{$replyto}}++;
print STDERR "Replying to: $tracker{$replyto} ($replyto{$tracker{$replyto}})\n"
if ( $opt_D > 1 );
} elsif ($replyto) {
push @{$unordered{$replyto}}, $msgid;
print STDERR "Replying to: Unknown Reference\n"
if ( $opt_D > 1 );
}
if ($references) {
my $rmsgidc = 1;
RMSGID: foreach my $rmsgid ( split("\n", $references) ) {
next RMSGID unless ( $rmsgid );
print STDERR "Reference MSGID ($rmsgidc): $rmsgid\n"
if ($opt_D > 1);
if ($rmsgid ne $replyto and $tracker{$rmsgid}) {
$replyto{$tracker{$rmsgid}}++;
print STDERR "Referencing ($rmsgidc): $tracker{$rmsgid} ($replyto{$tracker{$rmsgid}})\n"
if ( $opt_D > 1 );
} elsif ($tracker{$rmsgid}) {
print STDERR "Referenced In-Reply-To Duplicate ($rmsgidc): $tracker{$rmsgid} ($replyto{$tracker{$rmsgid}})\n"
if ( $opt_D > 1 );
} else {
push @{$unordered{$rmsgid}}, $msgid;
print STDERR "Referencing ($rmsgidc): Unknown Reference\n"
if ($opt_D > 1);
}
$rmsgidc++;
}
}
if ($new_lines * 10 < $lines - $new_lines and !$html) {
$html++;
$bad++;
print STDERR "$sub) New lines ($new_lines) is less that 10% of quoted lines("
. ($lines - $new_lines) . ") by $who\n" if ($opt_D);
} elsif (!$html and $toppost and $quotes) {
$html++;
$bad++;
print STDERR "$sub) Top Post from $who\n" if ($opt_D);
}
for my $line ($body->signature) {
print STDERR "SIG::$who => $line\n" if ($line !~ /^\s*$/ and $opt_D > 2);
}
$count{$who}++;
$lines{$who} += $lines;
$new_lines{$who} += $new_lines;
$html{$who} += $html;
$counter++;
if ($html{$who} > $count{$who}) {
die "ERROR: Bad Mails outnumbers Total Mails
$who: $html{$who} > $count{$who}
This should NEVER happen.\n"
}
}
print STDERR '-' x 75 . "\n" if ($opt_D);
$msgc++;
}
print "Removing temporary mailbox\n" if ($opt_v and $tmpbox);
unlink($mailbox) if ($tmpbox);
if ($opt_M) {
print "Start Date: " . UnixDate($date1, "%b %e, %Y") . "\n"
if ($opt_f);
print "End Date: " . UnixDate($date2, "%b %e, %Y") . "\n"
if ($opt_f or $opt_t);
@keys = sort {
$mailer{$b} <=> $mailer{$a}
} keys %mailer;
$count = @keys;
print "EMails Found: $Mcounter\n";
print "Unique Agents: $count\n\n";
print " # % Client\n";
for my $client (@keys ) {
my $perc = sprintf("%.1f", $mailer{$client}/$Mcounter*100);
print swrite(<<'END', $mailer{$client}, $perc, $client);
@>>> @>>> @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
END
}
exit;
}
if ( $opt_L ) {
print "Sorting by total number of lines sent\n" if ($opt_v);
@keys = sort {
$lines{$b} <=> $lines{$a} || $a cmp $b
} keys %lines;
} elsif ( $opt_N ) {
print "Sorting by total number of new lines sent\n" if ($opt_v);
@keys = sort {
$new_lines{$b} <=> $new_lines{$a} || length($b) <=> length($a) || $a cmp $b
} keys %new_lines;
} elsif ( $opt_G ) {
print "Sorting by total number of noise sent\n" if ($opt_v);
@keys = sort {
($lines{$a} / $new_lines{$a}) <=> ($lines{$b} / $new_lines{$b})
} keys %count;
} else {
print "Sorting by total number of emails sent\n" if ($opt_v);
@keys = sort {
$count{$b} <=> $count{$a} || length($b) <=> length($a) || $a cmp $b
} keys %count;
}
&load_formats;
die $@ if $@;
print '-' x 75 . "\n" if ($opt_v);
print "count v$VERSION by MadHat(at)Unspecific.com - [[|%^)
http://www.unspecific.com/.go/count/
--\n\n"
if ($opt_v);
print "Total emails checked: $count2\n" if ($opt_v);
print "Start Date: " . UnixDate($date1, "%b %e, %Y") . "\n"
if ($opt_f);
print "End Date: " . UnixDate($date2, "%b %e, %Y") . "\n"
if ($opt_f or $opt_t);
print "Total emails matched: $counter\n"; # if ($counter != $count2);
print "Total emails from losers: $bad\n" if ($bad and $opt_l);
$number = keys %count;
print "Total Unique Entries: $number\n";
$max_count = $opt_m?$opt_m:50;
for $id (@keys) {
$perc = $loser = 0;
$replyto{$id} = $replyto{$id}?$replyto{$id}:'0';
$current_number++;
last if ($current_number > $max_count);
$perc = $new_lines{$id} / $lines{$id} * 100 if ($lines{$id});
$loser = $html{$id} / $count{$id} * 100 if ($html{$id} > 0);
write;
}
if ($opt_u) {
print "\n--\n\n";
print "Contributers URLs\n";
print "------------ ----\n";
for (sort {$contrib{$b} <=> $contrib{$a}} keys %contrib) {
$contribc++;
printf "%2d) %-35s %3d\n", $contribc, $_, $contrib{$_};
}
print "\nURLs Found\n-------------\n";
for (sort keys %url_list) {
print "$_\n";
for $URL (@{$url_list{$_}}) {
print " $URL\n";
}
print "\n";
}
}
print "\n" if ($opt_H);
0;
#---------------------------------------
sub usage {
print "count - $VERSION - The email counter by: MadHat