You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

360 lines
9.0 KiB

#
# Part of the ht://Dig package <http://www.htdig.org/>
# Copyright (c) 1999-2004 The ht://Dig Group
# For copyright details, see the file COPYING in your distribution
# or the GNU Library General Public License (LGPL) version 2 or later
# <http://www.gnu.org/copyleft/lgpl.html>
#
# $Id: t_htdig_local,v 1.10 2004/05/28 13:15:30 lha Exp $
#
# Tests the following config attributes:
# bad_local_extensions
# check_unique_md5
# content_classifier
# database_dir
# exclude_urls
# limit_normalized
# limit_urls_to
# local_extensions
# local_urls
# local_urls_only
# local_user_urls
# max_hop_count
# md5_db
# mime_types
# remove_default_doc
# server_aliases
# start_url
test_functions_action=--start-apache
. ./test_functions
# set up config file with chosen non-default values
config=$testdir/conf/htdig.conf.tmp
cp $testdir/conf/htdig.conf2 $config
################################################################################
#test for local-file-system access to <http://...> URLs
/bin/rm -f var/htdig2/*
set_attr start_url "http://localhost:7400/set1/ http://localhost:7400/set1/title.html?site3.html http://localhost:7400/set1/title.html?site4.html"
# ban ite3.htm from query, but not from main URL.
# Allow site3.html, but not title.html?site3.html
set_attr bad_querystr ite3.htm
expected='bad_local.htm'
got=`$htdig "$@" -t -i -vv -c $config | grep "Bad local extension:" | sed -e"s-.*/--"`
if [ "$expected" != "$got" ]
then
fail "first htdig: expected
$expected
but got
$got"
fi
expected='db.docdb
db.docs
db.docs.index
db.excerpts
db.worddump
db.words.db
db.words.db_weakcmpr'
got=`/bin/ls var/htdig2`
if [ "$expected" != "$got" ]
then
fail "created files: expected
$expected
but got
$got"
fi
$htpurge -c $config
# should http://localhost:7400/set1/sub%2520dir be purged?
expected='http://localhost:7400/set1/
http://localhost:7400/set1/bad_local.htm
http://localhost:7400/set1/script.html
http://localhost:7400/set1/site%201.html
http://localhost:7400/set1/site2.html
http://localhost:7400/set1/site3.html
http://localhost:7400/set1/site4.html
http://localhost:7400/set1/sub%2520dir/
http://localhost:7400/set1/sub%2520dir/empty%20file.html
http://localhost:7400/set1/title.html
http://localhost:7400/set1/title.html?site4.html'
got=`./document -c $config -u | sort`
if [ "$expected" != "$got" ]
then
fail "first document: expected
$expected
but got
$got"
fi
set_attr bad_query_str
################################################################################
# limit_urls_to applies before server alias expansion
set_attr start_url http://myhost/set1/index.html
set_attr limit_urls_to "http://myhost/set1/"
set_attr server_aliases myhost=localhost:7400
$htdig "$@" -t -i -c $config || fail "couldn't dig second time"
$htpurge -c $config || fail "couldn't purge second time"
# only start_url uses alias, so only it passes the limit_urls_to test
expected='http://localhost:7400/set1/'
got=`./document -c $config -u | sort`
if [ "$expected" != "$got" ]
then
fail "second document: expected
$expected
but got
$got"
fi
################################################################################
# Check remote URLs not retrieved if local_urls_only specified
set_attr local_urls_only true
set_attr remove_default_doc site2.html
# Note: local_urls_only doesn't handle directories without a default doc
set_attr local_default_doc "site2.html empty%20file.html"
set_attr start_url http://myhost/set1/index.html
# don't care what the aliased URL is; only check the normalized one
set_attr limit_urls_to
set_attr limit_normalized "http://localhost:7400/set1/"
set_attr server_aliases myhost=localhost:7400
$htdig "$@" -t -i -c $config || fail "couldn't dig third time"
$htpurge -c $config || fail "couldn't purge third time"
expected='http://localhost:7400/set1/
http://localhost:7400/set1/index.html
http://localhost:7400/set1/script.html
http://localhost:7400/set1/site%201.html
http://localhost:7400/set1/site3.html
http://localhost:7400/set1/site4.html
http://localhost:7400/set1/sub%2520dir/
http://localhost:7400/set1/title.html'
got=`./document -c $config -u | sort`
if [ "$expected" != "$got" ]
then
fail "third document: expected
$expected
but got
$got"
fi
set_attr remove_default_doc index.html
set_attr local_urls_only false
set_attr limit_normalized
################################################################################
#test for <file:///...> URLs
expected='' # no "bad local" extensions for file:///
# Check only one "title.html" found...
set_attr check_unique_md5 true
set_attr start_url "http://localhost:7400/set1/title.html file://$PWD/htdocs/set1/"
set_attr limit_urls_to '${start_url}'
got=`$htdig "$@" -t -i -vv -c $config | grep "Bad local extension:" | sed -e"s-.*/--"`
if [ "$expected" != "$got" ]
then
fail "fourth htdig: expected
$expected
but got
$got"
fi
expected='db.docdb
db.docs
db.docs.index
db.excerpts
db.md5hash.db
db.worddump
db.words.db
db.words.db_weakcmpr'
got=`/bin/ls var/htdig2`
if [ "$expected" != "$got" ]
then
fail "fourth created files: expected
$expected
but got
$got"
fi
$htpurge -c $config || fail "couldn't purge fourth time"
expected='file:///set1/bad_local.htm
file:///set1/index.html
file:///set1/script.html
file:///set1/site%201.html
file:///set1/site2.html
file:///set1/site3.html
file:///set1/site4.html
file:///set1/sub%2520dir/empty%20file.html
/title.html'
got=`./document -c $config -u | sed "s#${PWD}/htdocs##" | sort | sed "s#.*/title.html#/title.html#"`
if [ "$expected" != "$got" ]
then
fail "fourth document: expected
$expected
but got
$got"
fi
################################################################################
#test mime types handling
expected='' # no "bad local" extensions for file:///
set_attr max_hop_count 1 # removes "empty%20file.html"
set_attr exclude_urls "site4.html script.html site[3].html"
set_attr bad_extensions .foo
set_attr local_urls_only false
rm -f var/htdig2/db.md5hash.db
set_attr md5_db '${database_base}.md5.db'
set_attr mime_types $PWD/mime-without-htm
set_attr content_classifier $PWD/say-text
echo 'text/html html' > mime-without-htm
echo '#!/bin/sh
echo text/plain' > say-text
chmod 700 say-text
got=`$htdig "$@" -t -i -vv -c $config | grep "MIME type:" | sed -e"s-.*/--"`
if [ "$expected" != "$got" ]
then
fail "fifth htdig: expected
$expected
but got
$got"
fi
expected='db.docdb
db.docs
db.docs.index
db.excerpts
db.md5.db
db.worddump
db.words.db
db.words.db_weakcmpr'
got=`/bin/ls var/htdig2`
if [ "$expected" != "$got" ]
then
fail "fifth created files: expected
$expected
but got
$got"
fi
$htpurge -c $config || fail "couldn't purge fifth time"
expected='file:///set1/bad_local.htm
file:///set1/index.html
file:///set1/nph-location.cgi
file:///set1/site%201.html
file:///set1/site2.html
file:///set1/site3.html
file:///set1/title.html'
got=`./document -c $config -u | sed "s#${PWD}/htdocs##" | sort`
if [ "$expected" != "$got" ]
then
fail "fifth document: expected
$expected
but got
$got"
fi
################################################################################
expected='' # no "bad local" extensions for file:///
set_attr max_hop_count # removes "empty%20file.html"
set_attr exclude_urls /CVS/
set_attr valid_extensions ".foo .html"
set_attr bad_extensions
set_attr mime_types $PWD/mime-without-htm
set_attr content_classifier $PWD/say-text
echo 'text/html html' > mime-without-htm
echo '#!/bin/sh
echo text/plain' > say-text
chmod 700 say-text
got=`$htdig "$@" -t -i -vv -c $config | grep "MIME type:" | sed -e"s-.*/--"`
if [ "$expected" != "$got" ]
then
fail "sixth htdig: expected
$expected
but got
$got"
fi
$htpurge -c $config || fail "couldn't purge sixth time"
expected='file:///set1/index.html
file:///set1/nph-location.foo
file:///set1/script.html
file:///set1/site%201.html
file:///set1/site2.html
file:///set1/site3.html
file:///set1/site4.html
file:///set1/sub%2520dir/empty%20file.html
file:///set1/title.html'
got=`./document -c $config -u | sed "s#${PWD}/htdocs##" | sort`
if [ "$expected" != "$got" ]
then
fail "sixth document: expected
$expected
but got
$got"
fi
################################################################################
set_attr local_urls_only
set_attr local_urls "http://somewhere/=$PWD/htdocs/"
set_attr local_user_urls "http://somewhere/=$PWD/,/set1/"
set_attr start_url "http://somewhere/~htdocs/"
set_attr valid_extensions
set_attr local_default_doc index.html
set_attr remove_default_doc index.html
$htdig "$@" -t -i -c $config || fail "couldn't dig seventh time"
$htpurge -c $config || fail "couldn't purge seventh time"
#local_urls_only can't handle .../~htdocs/sub%2520dir/empty%20file.html
expected='http://somewhere/~htdocs/
http://somewhere/~htdocs/script.html
http://somewhere/~htdocs/site%201.html
http://somewhere/~htdocs/site2.html
http://somewhere/~htdocs/site3.html
http://somewhere/~htdocs/site4.html
http://somewhere/~htdocs/title.html'
got=`./document -c $config -u | sort`
if [ "$expected" != "$got" ]
then
fail "seventh document: expected
$expected
but got
$got"
fi
/bin/rm mime-without-htm say-text
test_functions_action=--stop-apache
. ./test_functions