#! e:/usr/bin/perl.exe

=pod

Name:
bogoupgrade -- upgrade a bogofilter database to current version.

Author:
Gyepi Sam <gyepi@praxis-sw.com>
David Relson <relson@osagesoftware.com>

=cut

# bogofilter-0.3 through bogofilter-0.6.3
#
#	HEADER "# bogofilter email-count (format version A): %lu"
#

# bogofilter-0.7.x
#
#	HEADER "# bogofilter email-count (format version B): %lu"
#

# bogofilter-0.8 to bogofilter-0.13.x
#
# BerkeleyDB with double wordlists 
#	spamlist.db and goodlist.db

# bogofilter-0.14 and later
#
# BerkeleyDB with single wordlist
#	wordlist.db

require 5.005_03;
use strict;

sub convert_double_to_single();
sub convert_format_A();
sub convert_format_B();

my $VERSION = '0.4.2';

my ($in, $out, $help, $dir, $yday, $msg_count_token);

my $bogoutil = 'bogoutil';

for (my $i = 0; $i < @ARGV; $i++){
  my $arg = $ARGV[$i];

  if ($arg eq '-d'){
    $dir = $ARGV[++$i];
  } elsif ($arg eq '-i'){
    $in = $ARGV[++$i];
  } elsif ($arg eq '-o'){
    $out = $ARGV[++$i];
  } elsif ($arg eq '-b'){
    $bogoutil = $ARGV[++$i];
  } elsif ($arg eq '-y'){
    $yday = "-y $ARGV[++$i]";
  } elsif ($arg eq '-h' or $arg eq '--help'){
    help();
    exit(0);
  } else {
    usage();
    exit(1);
  }
}

my $db_ext = $ENV{DB_EXT} ||  'db';

if ( $dir ) {
    convert_double_to_single();
} else {
    die "Missing input filename\n" unless $in;
    die "Missing output filename\n" unless $out;

    my $msg_count_token = '.MSG_COUNT';

    open(F, "< $in") or die "Cannot open input file [$in]. $!.\n";
    my $sig = <F>;
    chomp($sig);
    if ($sig =~ m/^\# bogofilter wordlist \(format version A\):\s(\d+)$/){ 
	convert_format_A();
    }
    elsif ($sig =~ m/^\# bogofilter email-count \(format version B\):\s(\d+)/){
	convert_format_B();
    }
    else {
	$sig =~ y/[\040-\177]/_/cs;
	warn "Cannot recognize signature [$sig].\n";
	exit(2);
    }
}

sub convert_format_A() {  
  my $msg_count = $1;
  my $cmd = "$bogoutil $yday -l $out";
  open(OUT, "| $cmd") or die "Cannot run command \"$cmd\": $!\n";
  while(<F>){
    print OUT $_;
  }
  print OUT "$msg_count_token $msg_count\n";
  close(OUT) or die "Error executing command \"$cmd\": $!\n";
  close(F);
}

sub convert_format_B() {
  my $msg_count = $1;
  my $in_db = $in;

  $in_db =~ s/count$/$db_ext/;

  unless (-f $in_db){
    warn("Cannot find database file [$in_db]\n  corresponding to input file [$in]\n");
    exit;
  }

  my $cmd = "$bogoutil $yday -l $out";
  open(OUT, "| $cmd") or die "Cannot run command \"$cmd\": $!\n";

  close(F);
  $cmd = "$bogoutil $yday -d $in_db";
  open(F, "$cmd |") or die "Cannot run command \"$cmd\": $!\n";

  while(<F>){
    if (m/^\.count\s+(\d+)$/){
      warn("Found a message count of [$1] in db.\nThrowing away text file count of [$msg_count]\n");
      $msg_count = $1;
      next;
    }
    elsif (/^$msg_count_token\s(\d+)$/){
      warn("This database appears to have been upgraded already.\nBut there's no harm in doing it again.\n");
      $msg_count = $1;
      next;
    }
    print OUT $_;
  }
  print OUT "$msg_count_token $msg_count\n";

  close(F);
  close(OUT);
}

# args: filename format
sub cvt2to1_get($$) {
    my ($filename, $format) = @_;
    my @tmp;
    my $cmd;

    die "need array context in cvs2to1_get" unless wantarray;
    $cmd = "$bogoutil -d $filename";
    open(I, "$cmd |") or die "cannot run \"$cmd\": $!";
    while (<I>) {
	chomp;
	my @a = split;
	push @tmp, sprintf($format, $a[0], $a[1], $a[2]);
    }
    close I or die "error running \"$cmd\": $!";
    return @tmp;
}

sub convert_double_to_single() {
    my @tmp;

    my $word = "$dir/wordlist.$db_ext";
    my $spam = "$dir/spamlist.$db_ext";
    my $good = "$dir/goodlist.$db_ext";

    if (-e $word) { die "$word already exists."; }

    push @tmp, cvt2to1_get($spam, "%s %d 0 %d");
    push @tmp, cvt2to1_get($good, "%s 0 %d %d");
    my $cmd = "$bogoutil -l $word";
    open(O, "| $cmd") or die "cannot run \"$cmd\": $!";
    print O join("\n", sort @tmp), "\n";
    close O or die "error executing \"$cmd\": $!";
}

exit(0);

sub usage {
  warn "usage: $0 [ -d <bogofilter directory> ]\n  [ -i <input text file> -o <output db file> [ -b <path to bogoutil>] ]\n  [ -h ]\n";
}

sub help {
  print <<EOF;
$0 -- upgrades bogofilter database to current version.
Options:
  -d <directory>

    Name of directory containing database files.  Old files will be read and new
    files will be written.

  -i <input file>

    Text file containing message count, and possibly wordlist data records.  If
    the file only contains a message count but no word list records, there must
    be a database file, in the same directory as the text file, which contains
    the word list data.

 -o <output file>

    Output database file.  Use the appropriate file extension for your version
    of bogofilter, i.e. '.db' for Berkeley DB or '.sql' for SQLite3.

 -b <path to bogoutil program>

    Defaults to 'bogoutil', in the hopes that your shell will find it.

 -h     help

    You are reading it.
EOF
	exit(0);
}

# vim: set filetype=perl ai:
