#!/usr/local/bin/perl
# Active Expire -- Detecting actively accessed news groups and setting 
#		expire.ctl accordingly
$progname 	= 	"activexp";
$version 	= 	"Ver 0.04,  30 September 1996";
$author 	= 	"Yufan Hu <Y.Hu\@ulst.ac.uk>\n##\t\tPug Bainter <pug\@pug.net>";
#
# Slightly modified by Alan Brown (alan@manawatu.planet.co.nz) 15 Dec 1994
# Massively modified by Pug Bainter <pug@pug.net> 30 Sept 1996
#
require 'getopts.pl';
# Usage: run activexp everyday before expire starts. You can put it
#        in the crontab to start it before news.daily. It is essential to
#	run activexp before the news.notices file is moved to OLD and
#	gets compressed.
# Or:
#	activexp -l logfile
# for initialising the active group database
# The log file and expire.ctl file are assumed to be of INN 1.4
# Other options are:
#		  -d       turn on debugging
#		  -m	  mail results to news user
#		  -h	  do not keep a "hosts" log
#		  -o       override moderated newsgroup default check
#
# Copyright: Copyrighted by Yufan Hu <Y.Hu@ulst.ac.uk>, 1994
#
# Disclaim: Use it at your own risk.
#
############# Configuration starts here ########################
# The following should be changed according to your system setting up
# $LastRead:	the path name of the database to store the 
#		last access time of group
# $newslog:	the log file of innd & nnrpd (news.notice)
# $expire:	the expire.ctl file path name
# $hostslog:	log of hosts with their last accessing time
###############################################################

#$Prefix = "/news/" ;
$Prefix = "./" ;
$LogsPrefix = "/news/logs/" ;

$LastRead = $LogsPrefix . "LastRead.log";
$expire =   $Prefix . "expire.ctl";
$hostslog = $LogsPrefix . "Hosts.log";
$newslog =  $LogsPrefix . "news.notice";

# $default is the number of days for articles to be kept if the coresponding
# group is accessed (see expire.ctl for the meaning)
# NB. The patterns specified manually at the beginning of the expire.ctl
#	take precidence
# Keep 7 days for accessed groups
$default = 7;

# This is usually the entry in the second time field for A or U.
# (Ie. *:U:0:3:10)
$purge = 10;

# Binary groups usually eat up more disk space. $bingroups is a perl
# regexp of what we consider "binary" groups. This is actually any groups
# we want to expire at this rate. The expire rate for them should be set
# in the default section of the expire.ctl file.
$bingroups = "binaries|pictures|warez|crack|alt\.sex";

# Junk groups usually don't need to be kept at all. $junk is a perl
# regexp of what we consider junk groups. The expire rate for them
# should be set in the default section of the expire.ctl file.
$junk = "junk|control|test|flame";

# All the rest remain for this amount of time only. This is usually the
# entry in the second time field for A or U. (Ie. *:U:0:3:10)
$keeprest = 3;

# The accessed group database has to be established before proper expiring
# list being set up. $wait can be set to the number of days for collecting
# accessed groups from the log file and establish the database
# 
# You can cat together all the news.notices files stored in the OLD directory
# 	and run activexp once with '-l' option to collect the accessed groups
#	then set $wait to 1 to start automatical tunning the next day
# wait for 7 days before starting updating expire.ctl

$wait = 7;

# Flag to do host log info
$do_hosts = 1;

# Flag for moderated newsgroup check
$moderated = 0;

################# Debug Only #################################
$debug = 0;
#################################################################
# Anything below should not need to be changed

do Getopts('l:dhmo');

$newslog = $opt_l if ($opt_l);
$debug = 1 if ($opt_d);
$do_hosts = 0 if ($opt_h);
$moderated = 1 if ($opt_o);

if($debug){
	$warn .= "Logfile: $newslog\n";
}

$mail = "Mail -s Active-Groups news";
$expire_new = "$expire.new";

# Seconds in a day
$day = 3600 * 24;
$keeptime = $default * $day;
$wait = $wait * $day;

# open the database recording the time the group was last read
# access this database as an associated array
dbmopen(LASTREAD, $LastRead, 0600);

# open the news log file -- usually the news.notice
open(LOG, $newslog);

# get the current time
$now = time;

if($do_hosts) {

	# Initialising the hostslog array
	%hostslog = ();

	open(HOSTSLOG, $hostslog);
	while(<HOSTSLOG>){
		if(/^([^ ]+):\s+(.*)$/){
			$hostslog{$1} = $2;
		}
	}

	close(HOSTSLOG);
}

# scan through the log file, detecting any accessed groups within current day
# and store the current time in the database

while(<LOG>) {
	if(/^(.*\d\d:\d\d:\d\d).*nnrpd.*:\s+(.*)\s+group ([^\s]+)\s\d+/){
		$group = $3;
		if( $debug && (! $LASTREAD{$group}) ){
			$warn .= "Adding:  $group\n";
		}
		$LASTREAD{$group} = $now;
		$hostslog{$2} = "$1 $group";
	} elsif(/^(.*\d\d:\d\d:\d\d).*nnrpd.*:\s+(.*)\s+no_permission/){
		$hostslog{$2} = "$1 no_permission";
	}
}

close(LOG);

open(EXP, $expire);
open(EXP_NEW, ">$expire_new");

$start = "### Actively Accessed Groups Start ###";
$end = "### Actively Accessed Groups End ###";

# skip over everything in the begining of the expire.ctl file

@defined = ();

while(<EXP>){
	if(!/$start/){
		if(/^(.+):(.):(\d+|never):(\d+|never):(\d+|never)/){
			$group = $1;
			if($group eq '*'){
				if(($2 ne 'A' && !$moderated) || ($2 eq 'U' && $moderated)){
					if(($3 != 1 && $3 != 0) || $4 != $keeprest){
						print EXP_NEW "##$_";
						print EXP_NEW "*:$2:1:$keeprest:$5\n";
						next;
					} 
				}
			} else {
				@groups = split(/,/,$group);
				if(($4 >= $default) || ($4 eq "never")){
					if($group =~ /\./){
						$group =~ s/\./\\./g;
					}
					if($group =~ /[\*\+\?]/){
						$group =~ s/([\*\+\?])/.$1/g;
					}
					if ($debug) {
						print "@groups\n";
					}
					push(@defined, split(/,/,$group));
				} else {
					if ($debug) { print "skipping @groups define\n"; }
				}
			}
		}
		print EXP_NEW $_;
	} else { last; }
}

close(EXP);

# the automatic expire configuration starts here

($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($now);

$date = qx/date/;
chop($date);
$day = $wday;
if($date =~ /^([A-Z][a-z]+)\s/){
	$day = $1;
}

$start_token = "### ACTGROUP START TIME ###";
if( ! $LASTREAD{$start_token} ) {
	if ($debug) { print "Start time being set.\n"; }
	$LASTREAD{$start_token} = $now;
}

$start_time = $LASTREAD{$start_token};

require "ctime.pl";
chop($start_time_string = &ctime($start_time));

print EXP_NEW "$start
#############################################################
## Automatically generated by $progname ($version)
## Auther:	$author
## Date: 	$date 
## Started from:$start_time_string
##
## Do NOT change anything below this line. They are automatically
## generated by $progname
##
#############################################################
";

$total = 0;

# Scan over the database. If a group was accessed within the predefined
# keeptime, then change the expire time for that group to the keeptime

for $group ( sort (keys %LASTREAD) ) {
	($group eq $start_token) && next;
	# if some group is specifically defined, skip it
	$def = "NOT";
	for $d ( @defined ){
		if($group =~ /^$d/){
			if($debug){
				$d1 = $d;
				if($d1 =~ /\.[*+?]/){
					$d1 =~ s/\.([*+?])/$1/g;
				}
				$warn .= "Drop: $group, implied by $d1\n";
			}
			$def = "Yes";
		}
		if( $def eq "Yes" ) { last;}
	}
	if( $def eq "Yes" ) { 
		next;
	}

	$diff = $now - $LASTREAD{$group};
	if($diff <= $keeptime){
		if(($group !~ /$bingroups/) && ($group !~ /$junk/)) {
			print EXP_NEW "$group:A:0:$default:$purge\n";
		} else {
			if ($debug) { $warn .= "Drop: $group, binary or junk group\n"; }
		}
		$total++;
	} else {
		if($debug){
			$warn .= "Drop: $group, inactive for $default days\n";
		}
		delete $LASTREAD{$group};
	}
}

# get the number of entries in the database
$groups = (keys %LASTREAD) - 1;
print EXP_NEW "##  Total: $total/$groups groups processed\n";
print EXP_NEW "\n$end\n## Do not put anything after this line ###\n";

close(EXP_NEW); dbmclose(LASTREAD);

if($debug){
	$warn .= "##  Total: $total/$groups groups processed\n";
}

if ($opt_m) {
	# mail the result to the news owner
	system "$mail < $expire_new";
}

if($now - $start_time > $wait && ! $debug){
	system "mv $expire $expire.$day; mv $expire_new $expire";
} else {
	$warn .= "Collecting group information only, $expire intacted.\n";
	$warn .= "Expire list is in $expire.new\n";
}

if ($do_hosts) {
	# flush the hostslog file
	system "mv -f $hostslog $hostslog.$day";
	open(HOSTSLOG, ">$hostslog");

	$hosts = (keys %hostslog);

	if($debug){ $warn .= "\nHost access log: $hosts hosts\n\n";}
	foreach $host ( sort keys %hostslog ){
		printf HOSTSLOG "%-25s $hostslog{$host}\n", "$host:";
		if($debug){ 
			$warn .= sprintf("%-25s $hostslog{$host}\n", "$host:");
		}
	}
	$warn .= "\n";
}

if($warn){ print "\nMessage from $progname:\n$warn";}
exit;

########### Change Log ####################################
#
# 8/Aug/94:
#	Bug: reported by hagberg@mail.med.cornell.edu:
#		when something like '*binaries*' appears in the expire.ctl
#		actgroups will complain because it is not a legal regexp 
#		for perl
#	     fix: change the '*','+','?' appear in the pattern to '.*','.+'
#		and '.+' before compare with the group names.
# 11/Aug/94:
#	Add: Hosts.log file to log the hosts accessing our server and its
#		last access time
# 17/Aug/94:
#     Add: delete entries from the database if the groups are not active
#             for more than $default days
#
# 18/Aug/94:(Ver 0.03b,  18 August 1994)
#	Bug: reported by Tim Rosmus <tim@mdd.comm.mot.com>:
#		if the expire line contains never, that line was not got into
#		the @defined array. Fixed.
#	Add:	Three more options (patches from tim@mdd.comm.mot.com)
#		-d, -h, -m
# 22/Aug/94:
#	Bug: reported by Andrew Macpherson <A.Macpherson@bnr.co.uk>:
#		de.*:A:1:2:12 will cause demon.* to be dropped
# 23/Aug/94:
#	Add: $prefix for the name of directory of expire.ctl & others
#		Suggested: Andrew Macpherson <A.Macpherson@bnr.co.uk>
# 30/Aug/94:
#	Chg: suggested by Alan Brown <root@manawatu.planet.co.nz>
#		Do not touch the purge part of the line
#			*:A:1:1:45
#		so that things which have expire date set will stay longer
#		(such as FAQs).
#
# 15/Dec/94
#       Chg: (Alan Brown) <root@manawatu.planet.co.nz>
#              Do not touch the line
#                        *:A:0:0:0
#              while still processing
#                        *:M:1:1:45
#                        *:U:1:1:45
#              This allow INN to purge the contents of removed groups
#              immediately instead of having them expire slowly.
#
#
# 30/Sep/96
#	Chg: Pug <pug@pug.net>
#		The name of the script from actgroups.pl to activexp.pl.
#	Chg: Pug <pug@pug.net>
#		The behavior when checking the default sections of the
#		the old expire.ctl. Only add to the defined list if
#		the default keep is the same or larger than $default.
#		Check for 0 or 1 instead of just 1. (If somone wants
#		their message to expire that quick, so be it.)
#	Del: Pug <pug@pug.net>
#		$bindefault due to notes for $bingroups and $junk.
#	Add: Pug <pug@pug.net>
#		The $bingroups and $junk. Read notes there as well.
#	Add: Pug <pug@pug.net>
#		$LogsPrefix
#	Add: Pug <pug@pug.net>
#		More debugging statements.
#	Add: Pug <pug@pug.net>
#		-o flag to override moderated groups default check.
#		All this does is change the check in the definition
#		section from ne 'A' to eq 'U'.
#	Add: Pug <pug@pug.net>
#		Do the right thing with commas in the default section.
#	Add: Pug <pug@pug.net>
#		Add comments regarding purge and keeprest.