You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

299 lines
6.1 KiB

#!/usr/local/bin/perl
##
## changehost.pl (C) 1995 Andrew Scherpbier
##
## This program will change hostnames of URLs in the document database and index.
##
## usage:
## changehost.pl database_base from to
##
## example:
## changehost.pl /opt/www/htdig/sdsu www.sdsu.edu www.northpole.net
##
## Two new database will be created with a base of '/tmp/new'.
## These databases can then be used by htsearch.
##
use GDBM_File;
$base = $ARGV[0];
$from = $ARGV[1];
$to = $ARGV[2];
$dbfile = "$base.docdb";
$newfile = "/tmp/new.docdb";
##
## Convert the document database first.
##
tie(%newdb, GDBM_File, $newfile, GDBM_NEWDB, 0644) || die "$newfile: '$!'";
tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "$dbfile: $!";
while (($key, $value) = each %docdb)
{
if ($key =~ /http:\/\/$from/i)
{
%record = parse_ref_record($value);
$key =~ s/http:\/\/$from/http:\/\/$to/i;
print "$key\n";
$t = $record{"URL"};
$t =~ s/http:\/\/$from/http:\/\/$to/i;
$record{"URL"} = $t;
$value = create_ref_record(%record);
}
$newdb{$key} = $value;
}
untie %newdb;
untie %docdb;
##
## Now create the document index
##
$newfile = "/tmp/new.docs.index";
$dbfile = "$base.docs.index";
tie(%newdb, GDBM_File, $newfile, GDBM_NEWDB, 0644) || die "$newfile: '$!'";
tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "$dbfile: $!";
while (($key, $value) = each %docdb)
{
if ($value =~ /http:\/\/$from/i)
{
$value =~ s/http:\/\/$from/http:\/\/$to/i;
}
$newdb{$key} = $value;
}
untie %newdb;
untie %docdb;
######################################################################
sub create_ref_record
{
local(%rec) = @_;
local($s);
if (exists $rec{"ID"})
{
$s .= pack("Ci", 0, $rec{"ID"});
}
if (exists $rec{"TIME"})
{
$s .= pack("Ci", 1, $rec{"TIME"});
}
if (exists $rec{"ACCESSED"})
{
$s .= pack("Ci", 2, $rec{"ACCESSED"});
}
if (exists $rec{"STATE"})
{
$s .= pack("Ci", 3, $rec{"STATE"});
}
if (exists $rec{"SIZE"})
{
$s .= pack("Ci", 4, $rec{"SIZE"});
}
if (exists $rec{"LINKS"})
{
$s .= pack("Ci", 5, $rec{"LINKS"});
}
if (exists $rec{"IMAGESIZE"})
{
$s .= pack("Ci", 6, $rec{"IMAGESIZE"});
}
if (exists $rec{"HOPCOUNT"})
{
$s .= pack("Ci", 7, $rec{"HOPCOUNT"});
}
if (exists $rec{"URL"})
{
$s .= pack("Ci", 8, length($rec{"URL"}));
$s .= $rec{"URL"};
}
if (exists $rec{"HEAD"})
{
$s .= pack("Ci", 9, length($rec{"HEAD"}));
$s .= $rec{"HEAD"};
}
if (exists $rec{"TITLE"})
{
$s .= pack("Ci", 10, length($rec{"TITLE"}));
$s .= $rec{"TITLE"};
}
if (exists $rec{"DESCRIPTIONS"})
{
@v = split('', $rec{"DESCRIPTIONS"});
$s .= pack("Ci", 11, $#v - 1);
foreach (@v)
{
$s .= pack("i", length($_));
$s .= $_;
}
}
if (exists $rec{"ANCHORS"})
{
@v = split('', $rec{"ANCHORS"});
$s .= pack("Ci", 12, $#v - 1);
foreach (@v)
{
$s .= pack("i", length($_));
$s .= $_;
}
}
if (exists $rec{"EMAIL"})
{
$s .= pack("Ci", 13, length($rec{"EMAIL"}));
$s .= $rec{"EMAIL"};
}
if (exists $rec{"NOTIFICATION"})
{
$s .= pack("Ci", 14, length($rec{"NOTIFICATION"}));
$s .= $rec{"NOTIFICATION"};
}
if (exists $rec{"SUBJECT"})
{
$s .= pack("Ci", 15, length($rec{"SUBJECT"}));
$s .= $rec{"SUBJECT"};
}
return $s;
}
sub parse_ref_record
{
local($value) = @_;
local(%rec, $length, $count, $result);
while (length($value) > 0)
{
$what = unpack("C", $value);
$value = substr($value, 1);
if ($what == 0)
{
# ID
$rec{"ID"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 1)
{
# TIME
$rec{"TIME"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 2)
{
# ACCESSED
$rec{"ACCESSED"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 3)
{
# STATE
$rec{"STATE"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 4)
{
# SIZE
$rec{"SIZE"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 5)
{
# LINKS
$rec{"LINKS"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 6)
{
# IMAGESIZE
$rec{"IMAGESIZE"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 7)
{
# HOPCOUNT
$rec{"HOPCOUNT"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 8)
{
# URL
$length = unpack("i", $value);
$rec{"URL"} = unpack("x4 A$length", $value);
$value = substr($value, 4 + $length);
}
elsif ($what == 9)
{
# HEAD
$length = unpack("i", $value);
$rec{"HEAD"} = unpack("x4 A$length", $value);
$value = substr($value, 4 + $length);
}
elsif ($what == 10)
{
# TITLE
$length = unpack("i", $value);
$rec{"TITLE"} = unpack("x4 A$length", $value);
$value = substr($value, 4 + $length);
}
elsif ($what == 11)
{
# DESCRIPTIONS
$count = unpack("i", $value);
$value = substr($value, 4);
$result = "";
foreach (1 .. $count)
{
$length = unpack("i", $value);
$result = $result . unpack("x4 A$length", $value) . "";
$value = substr($value, 4 + $length);
}
chop $result;
$rec{"DESCRIPTIONS"} = $result;
}
elsif ($what == 12)
{
# ANCHORS
$count = unpack("i", $value);
$value = substr($value, 4);
$result = "";
foreach (1 .. $count)
{
$length = unpack("i", $value);
$result = $result . unpack("x4 A$length", $value) . "";
$value = substr($value, 4 + $length);
}
chop $result;
$rec{"ANCHORS"} = $result;
}
elsif ($what == 13)
{
# EMAIL
$length = unpack("i", $value);
$rec{"EMAIL"} = unpack("x4 A$length", $value);
$value = substr($value, 4 + $length);
}
elsif ($what == 14)
{
# NOTIFICATION
$length = unpack("i", $value);
$rec{"NOTIFICATION"} = unpack("x4 A$length", $value);
$value = substr($value, 4 + $length);
}
elsif ($what == 15)
{
# SUBJECT
$length = unpack("i", $value);
$rec{"SUBJECT"} = unpack("x4 A$length", $value);
$value = substr($value, 4 + $length);
}
}
return %rec;
}