/* rexx_merge.cmd, takes bogofilter goodlist.db and spamlist.db
   and merges them into the single wordlist.db file

   Original procedure taken from merg.cmd by Yuri Dario
   which used the GNU awk, cat and sort utilities

   The old format was "'word' 'count' 'date'" for the spam and
   good lists.
   The new format is "'word' 'good count' 'spam count' 'date'" 

   Rodney Pont <rpont@infohit.fsnet.co.uk>

*/

/* Load the REXX Funcs library */
call RxFuncAdd 'SysLoadFuncs', 'RexxUtil', 'SysLoadFuncs'
call SysLoadFuncs

/* dump wordlists to file */
'bogoutil -d spamlist.db > spam.wds'
'bogoutil -d goodlist.db > good.wds'

LineCount = 0
/* Read the spam.wds file */
do while lines(spam.wds) <> 0
   OldWordLine = linein(spam.wds)
   /* extract the valiues */
   parse value OldWordLine with OldWord OldCount OldDate
   /* increment the array pointer */
   LineCount = LineCount + 1
   /* put the newformat lines into the array */
   NewLines.LineCount = OldWord ' ' OldCount ' 0 ' OldDate
end
call stream spam.wds, "C", "CLOSE"

/* Read the good.wds file */
do while lines(good.wds) <> 0
   OldWordLine = linein(good.wds)
   /* extract the valiues */
   parse value OldWordLine with OldWord OldCount OldDate
   /* increment the array pointer */
   LineCount = LineCount + 1
   /* put the newformat lines into the array */
   NewLines.LineCount = OldWord ' 0 ' OldCount ' ' OldDate
end
call stream good.wds, "C", "CLOSE"

NewLines.0 = LineCount

/* Delete the wordlist.txt and wordlist.db file if they exist */
if stream('wordlist.txt', "c", "query exist") \= "" then
   call sysFileDelete 'wordlist.txt'
if stream('wordlist.db', "c", "query exist") \= "" then
   call sysFileDelete 'wordlist.db'

call stream  'wordlist.txt', "C", "OPEN WRITE"

do Count = 1 to NewLines.0
   call lineout 'wordlist.txt', NewLines.Count
end

call stream  'wordlist.txt', "C", "CLOSE"

/* sort them into order  by loading and dumping */
'bogoutil -l wordlist.db < wordlist.txt'

if stream('wordlist.txt', "c", "query exist") \= "" then
   call sysFileDelete 'wordlist.txt'
'bogoutil -d wordlist.db > wordlist.txt'

if stream('wordlist.db', "c", "query exist") \= "" then
   call sysFileDelete 'wordlist.db'

'bogoutil -l wordlist.db < wordlist.txt'
