You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

100 lines
3.7 KiB

#!@BASH@
#
# gen-collect 1.1
#
# Copyright (c) 1998-1999 The ht://Dig Group
# Distributed under the terms of the GNU General Public License (GPL)
# version 2 or later.
# for the ht://Dig search system http://www.htdig.org/
# and the multidig script system http://www.htdig.org/contrib/scripts/
#
# Part of the "multidig script system"
# a system of shell scripts and some modified conf files
# that makes dealing with multiple databases easier for ht://Dig
#
# Syntax:
# gen-collect [-v]
#
# Merges multiple databases into ``collected'' db
# (This is done by multidig too, but this script lets you *just*
# generate the collections.)
#
# This is useful for debugging info
if [ "$1" = "-v" ]; then
verbose=-v
fi
# You may need to set the following:
MULTIDIG_CONF=@CONFIG_DIR@/multidig.conf
source $MULTIDIG_CONF
# We may be called inside multidig, so we don't want to mess with the report.
for collect in `cat $COLLECT_LIST`; do
# What's the conf file for this database?
CONF=$CONFIG_DIR/$collect.conf
echo Generating $collect at: `date`
# We want to replace the old .work files with the first database
# This ensures that we *only* get documents from the merged db
# and not old ones left around in our previous collected db
firstdb=`head -n 1 $DB_BASE/$collect/$collect.collect`
cp $DB_BASE/$firstdb/db.docdb $DB_BASE/$collect/db.docdb.work
cp $DB_BASE/$firstdb/db.docs.index $DB_BASE/$collect/db.docs.index.work
cp $DB_BASE/$firstdb/db.wordlist.work $DB_BASE/$collect/db.wordlist.work
cp $DB_BASE/$firstdb/db.words.db $DB_BASE/$collect/db.words.db.work
# Now we need to work out the number of remaining db in the collection
LENGTH=`wc -l $DB_BASE/$collect/$collect.collect | awk '{print $1;}'`
let NUM=LENGTH-1
for db in `tail -n $NUM $DB_BASE/$collect/$collect.collect`; do
if [ "$1" = "-v" ]; then
echo Merging db $db of collect $collect
fi
MERGE_CONF=$CONFIG_DIR/$db.conf
# There's a slight bug in the merge function.
# It's looking for db.wordlist, not .work. So lets copy it temporarily
cp $DB_BASE/$db/db.wordlist.work $DB_BASE/$db/db.wordlist
# Do the merging, using -d and -w to prevent normal merging
# (it would be a waste of time, we'd repeat it multiple times)
$BINDIR/htmerge $verbose -s -d -w -m $MERGE_CONF -a -c $CONF >>$REPORT
# And now remove the copy
rm $DB_BASE/$db/db.wordlist
done
# Now after merging in all of those databases
# we need to do the usual htmerge run
$BINDIR/htmerge -a $verbose -s -c $CONF >>$REPORT
if [ "$1" = "-v" ]; then
echo Moving files $collect at: `date`
fi
# If you don't have the space for backups, this step can be omitted
if [ $BACKUPS = "true" ]; then
cp $DB_BASE/$collect/db.docdb $DB_BASE/$collect/db.docdb.bak
cp $DB_BASE/$collect/db.docs.index $DB_BASE/$collect/db.docs.index.bak
# cp $DB_BASE/$collect/db.wordlist $DB_BASE/$collect/db.wordlist.bak
cp $DB_BASE/$collect/db.words.db $DB_BASE/$collect/db.words.db.bak
fi
# Move them because we don't want .work files around
# (Remember, we're generating using merging,
# so we want to make sure we don't have old stuff to gum up the works...
mv $DB_BASE/$collect/db.docdb.work $DB_BASE/$collect/db.docdb
mv $DB_BASE/$collect/db.docs.index.work $DB_BASE/$collect/db.docs.index
# mv $DB_BASE/$collect/db.wordlist.work $DB_BASE/$collect/db.wordlist
mv $DB_BASE/$collect/db.words.db.work $DB_BASE/$collect/db.words.db
# Make them world readable!
chmod 644 $DB_BASE/$collect/db.docdb
chmod 644 $DB_BASE/$collect/db.docs.index
# chmod 644 $DB_BASE/$collect/db.wordlist
chmod 644 $DB_BASE/$collect/db.words.db
if [ "$1" = "-v" ]; then
echo Done with $collect at: `date`
fi
done
# That's it!