You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

184 lines
4.0 KiB

#!/usr/local/bin/perl
##
## doclist.pl (C) 1995 Andrew Scherpbier
##
## This program will list the information in the documentdb generated by htdig.
##
use GDBM_File;
$dbfile = $ARGV[0];
tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "Unable to open $dbfile: $!";
while (($key, $value) = each %docdb)
{
next if $key =~ /^nextDocID/;
%record = parse_ref_record($value);
print "Title: $record{'TITLE'}\n";
print "Descriptions: $record{'DESCRIPTIONS'}\n";
print "URL: $record{'URL'}\n";
print "\n";
}
sub parse_ref_record
{
local($value) = @_;
local(%rec, $length, $count, $result);
while (length($value) > 0)
{
$what = unpack("C", $value);
$value = substr($value, 1);
if ($what == 0)
{
# ID
$rec{"ID"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 1)
{
# TIME
$rec{"TIME"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 2)
{
# ACCESSED
$rec{"ACCESSED"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 3)
{
# STATE
$rec{"STATE"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 4)
{
# SIZE
$rec{"SIZE"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 5)
{
# LINKS
$rec{"LINKS"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 6)
{
# IMAGESIZE
$rec{"IMAGESIZE"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 7)
{
# HOPCOUNT
$rec{"HOPCOUNT"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 8)
{
# URL
$length = unpack("i", $value);
$rec{"URL"} = unpack("x4 A$length", $value);
$value = substr($value, 4 + $length);
}
elsif ($what == 9)
{
# HEAD
$length = unpack("i", $value);
$rec{"HEAD"} = unpack("x4 A$length", $value);
$value = substr($value, 4 + $length);
}
elsif ($what == 10)
{
# TITLE
$length = unpack("i", $value);
$rec{"TITLE"} = unpack("x4 A$length", $value);
$value = substr($value, 4 + $length);
}
elsif ($what == 11)
{
# DESCRIPTIONS
$count = unpack("i", $value);
$value = substr($value, 4);
$result = "";
foreach (1 .. $count)
{
$length = unpack("i", $value);
$result = $result . unpack("x4 A$length", $value) . "";
$value = substr($value, 4 + $length);
}
chop $result;
$rec{"DESCRIPTIONS"} = $result;
}
elsif ($what == 12)
{
# ANCHORS
$count = unpack("i", $value);
$value = substr($value, 4);
$result = "";
foreach (1 .. $count)
{
$length = unpack("i", $value);
$result = $result . unpack("x4 A$length", $value) . "";
$value = substr($value, 4 + $length);
}
chop $result;
$rec{"ANCHORS"} = $result;
}
elsif ($what == 13)
{
# EMAIL
$length = unpack("i", $value);
$rec{"EMAIL"} = unpack("x4 A$length", $value);
$value = substr($value, 4 + $length);
}
elsif ($what == 14)
{
# NOTIFICATION
$length = unpack("i", $value);
$rec{"NOTIFICATION"} = unpack("x4 A$length", $value);
$value = substr($value, 4 + $length);
}
elsif ($what == 15)
{
# SUBJECT
$length = unpack("i", $value);
$rec{"SUBJECT"} = unpack("x4 A$length", $value);
$value = substr($value, 4 + $length);
}
elsif ($what == 16)
{
# STRING (ignore, but unpack)
$length = unpack("i", $value);
$rec{"STRING"} = unpack("x4 A$length", $value);
$value = substr($value, 4 + $length);
}
elsif ($what == 17)
{
# METADSC
$length = unpack("i", $value);
$rec{"METADSC"} = unpack("x4 A$length", $value);
$value = substr($value, 4 + $length);
}
elsif ($what == 18)
{
# BACKLINKS
$rec{"BACKLINKS"} = unpack("i", $value);
$value = substr($value, 4);
}
elsif ($what == 19)
{
# SIGNATURE
$rec{"SIG"} = unpack("i", $value);
$value = substr($value, 4);
}
}
return %rec;
}