You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
360 lines
9.0 KiB
360 lines
9.0 KiB
#
|
|
# Part of the ht://Dig package <http://www.htdig.org/>
|
|
# Copyright (c) 1999-2004 The ht://Dig Group
|
|
# For copyright details, see the file COPYING in your distribution
|
|
# or the GNU Library General Public License (LGPL) version 2 or later
|
|
# <http://www.gnu.org/copyleft/lgpl.html>
|
|
#
|
|
# $Id: t_htdig_local,v 1.10 2004/05/28 13:15:30 lha Exp $
|
|
#
|
|
|
|
# Tests the following config attributes:
|
|
# bad_local_extensions
|
|
# check_unique_md5
|
|
# content_classifier
|
|
# database_dir
|
|
# exclude_urls
|
|
# limit_normalized
|
|
# limit_urls_to
|
|
# local_extensions
|
|
# local_urls
|
|
# local_urls_only
|
|
# local_user_urls
|
|
# max_hop_count
|
|
# md5_db
|
|
# mime_types
|
|
# remove_default_doc
|
|
# server_aliases
|
|
# start_url
|
|
|
|
test_functions_action=--start-apache
|
|
. ./test_functions
|
|
|
|
# set up config file with chosen non-default values
|
|
config=$testdir/conf/htdig.conf.tmp
|
|
cp $testdir/conf/htdig.conf2 $config
|
|
|
|
################################################################################
|
|
#test for local-file-system access to <http://...> URLs
|
|
|
|
/bin/rm -f var/htdig2/*
|
|
set_attr start_url "http://localhost:7400/set1/ http://localhost:7400/set1/title.html?site3.html http://localhost:7400/set1/title.html?site4.html"
|
|
# ban ite3.htm from query, but not from main URL.
|
|
# Allow site3.html, but not title.html?site3.html
|
|
set_attr bad_querystr ite3.htm
|
|
expected='bad_local.htm'
|
|
got=`$htdig "$@" -t -i -vv -c $config | grep "Bad local extension:" | sed -e"s-.*/--"`
|
|
if [ "$expected" != "$got" ]
|
|
then
|
|
fail "first htdig: expected
|
|
$expected
|
|
but got
|
|
$got"
|
|
fi
|
|
|
|
expected='db.docdb
|
|
db.docs
|
|
db.docs.index
|
|
db.excerpts
|
|
db.worddump
|
|
db.words.db
|
|
db.words.db_weakcmpr'
|
|
got=`/bin/ls var/htdig2`
|
|
if [ "$expected" != "$got" ]
|
|
then
|
|
fail "created files: expected
|
|
$expected
|
|
but got
|
|
$got"
|
|
fi
|
|
|
|
$htpurge -c $config
|
|
|
|
# should http://localhost:7400/set1/sub%2520dir be purged?
|
|
expected='http://localhost:7400/set1/
|
|
http://localhost:7400/set1/bad_local.htm
|
|
http://localhost:7400/set1/script.html
|
|
http://localhost:7400/set1/site%201.html
|
|
http://localhost:7400/set1/site2.html
|
|
http://localhost:7400/set1/site3.html
|
|
http://localhost:7400/set1/site4.html
|
|
http://localhost:7400/set1/sub%2520dir/
|
|
http://localhost:7400/set1/sub%2520dir/empty%20file.html
|
|
http://localhost:7400/set1/title.html
|
|
http://localhost:7400/set1/title.html?site4.html'
|
|
|
|
got=`./document -c $config -u | sort`
|
|
|
|
if [ "$expected" != "$got" ]
|
|
then
|
|
fail "first document: expected
|
|
$expected
|
|
but got
|
|
$got"
|
|
fi
|
|
|
|
set_attr bad_query_str
|
|
|
|
|
|
################################################################################
|
|
# limit_urls_to applies before server alias expansion
|
|
set_attr start_url http://myhost/set1/index.html
|
|
set_attr limit_urls_to "http://myhost/set1/"
|
|
set_attr server_aliases myhost=localhost:7400
|
|
$htdig "$@" -t -i -c $config || fail "couldn't dig second time"
|
|
$htpurge -c $config || fail "couldn't purge second time"
|
|
# only start_url uses alias, so only it passes the limit_urls_to test
|
|
expected='http://localhost:7400/set1/'
|
|
|
|
got=`./document -c $config -u | sort`
|
|
|
|
if [ "$expected" != "$got" ]
|
|
then
|
|
fail "second document: expected
|
|
$expected
|
|
but got
|
|
$got"
|
|
fi
|
|
|
|
|
|
|
|
################################################################################
|
|
# Check remote URLs not retrieved if local_urls_only specified
|
|
set_attr local_urls_only true
|
|
set_attr remove_default_doc site2.html
|
|
# Note: local_urls_only doesn't handle directories without a default doc
|
|
set_attr local_default_doc "site2.html empty%20file.html"
|
|
set_attr start_url http://myhost/set1/index.html
|
|
# don't care what the aliased URL is; only check the normalized one
|
|
set_attr limit_urls_to
|
|
set_attr limit_normalized "http://localhost:7400/set1/"
|
|
set_attr server_aliases myhost=localhost:7400
|
|
$htdig "$@" -t -i -c $config || fail "couldn't dig third time"
|
|
$htpurge -c $config || fail "couldn't purge third time"
|
|
expected='http://localhost:7400/set1/
|
|
http://localhost:7400/set1/index.html
|
|
http://localhost:7400/set1/script.html
|
|
http://localhost:7400/set1/site%201.html
|
|
http://localhost:7400/set1/site3.html
|
|
http://localhost:7400/set1/site4.html
|
|
http://localhost:7400/set1/sub%2520dir/
|
|
http://localhost:7400/set1/title.html'
|
|
|
|
got=`./document -c $config -u | sort`
|
|
|
|
if [ "$expected" != "$got" ]
|
|
then
|
|
fail "third document: expected
|
|
$expected
|
|
but got
|
|
$got"
|
|
fi
|
|
set_attr remove_default_doc index.html
|
|
set_attr local_urls_only false
|
|
set_attr limit_normalized
|
|
|
|
|
|
################################################################################
|
|
#test for <file:///...> URLs
|
|
|
|
expected='' # no "bad local" extensions for file:///
|
|
# Check only one "title.html" found...
|
|
set_attr check_unique_md5 true
|
|
set_attr start_url "http://localhost:7400/set1/title.html file://$PWD/htdocs/set1/"
|
|
set_attr limit_urls_to '${start_url}'
|
|
got=`$htdig "$@" -t -i -vv -c $config | grep "Bad local extension:" | sed -e"s-.*/--"`
|
|
if [ "$expected" != "$got" ]
|
|
then
|
|
fail "fourth htdig: expected
|
|
$expected
|
|
but got
|
|
$got"
|
|
fi
|
|
|
|
expected='db.docdb
|
|
db.docs
|
|
db.docs.index
|
|
db.excerpts
|
|
db.md5hash.db
|
|
db.worddump
|
|
db.words.db
|
|
db.words.db_weakcmpr'
|
|
got=`/bin/ls var/htdig2`
|
|
if [ "$expected" != "$got" ]
|
|
then
|
|
fail "fourth created files: expected
|
|
$expected
|
|
but got
|
|
$got"
|
|
fi
|
|
|
|
$htpurge -c $config || fail "couldn't purge fourth time"
|
|
|
|
expected='file:///set1/bad_local.htm
|
|
file:///set1/index.html
|
|
file:///set1/script.html
|
|
file:///set1/site%201.html
|
|
file:///set1/site2.html
|
|
file:///set1/site3.html
|
|
file:///set1/site4.html
|
|
file:///set1/sub%2520dir/empty%20file.html
|
|
/title.html'
|
|
|
|
got=`./document -c $config -u | sed "s#${PWD}/htdocs##" | sort | sed "s#.*/title.html#/title.html#"`
|
|
|
|
if [ "$expected" != "$got" ]
|
|
then
|
|
fail "fourth document: expected
|
|
$expected
|
|
but got
|
|
$got"
|
|
fi
|
|
|
|
|
|
################################################################################
|
|
#test mime types handling
|
|
|
|
expected='' # no "bad local" extensions for file:///
|
|
set_attr max_hop_count 1 # removes "empty%20file.html"
|
|
set_attr exclude_urls "site4.html script.html site[3].html"
|
|
set_attr bad_extensions .foo
|
|
set_attr local_urls_only false
|
|
|
|
rm -f var/htdig2/db.md5hash.db
|
|
set_attr md5_db '${database_base}.md5.db'
|
|
|
|
set_attr mime_types $PWD/mime-without-htm
|
|
set_attr content_classifier $PWD/say-text
|
|
echo 'text/html html' > mime-without-htm
|
|
echo '#!/bin/sh
|
|
echo text/plain' > say-text
|
|
chmod 700 say-text
|
|
got=`$htdig "$@" -t -i -vv -c $config | grep "MIME type:" | sed -e"s-.*/--"`
|
|
if [ "$expected" != "$got" ]
|
|
then
|
|
fail "fifth htdig: expected
|
|
$expected
|
|
but got
|
|
$got"
|
|
fi
|
|
|
|
expected='db.docdb
|
|
db.docs
|
|
db.docs.index
|
|
db.excerpts
|
|
db.md5.db
|
|
db.worddump
|
|
db.words.db
|
|
db.words.db_weakcmpr'
|
|
got=`/bin/ls var/htdig2`
|
|
if [ "$expected" != "$got" ]
|
|
then
|
|
fail "fifth created files: expected
|
|
$expected
|
|
but got
|
|
$got"
|
|
fi
|
|
|
|
$htpurge -c $config || fail "couldn't purge fifth time"
|
|
|
|
expected='file:///set1/bad_local.htm
|
|
file:///set1/index.html
|
|
file:///set1/nph-location.cgi
|
|
file:///set1/site%201.html
|
|
file:///set1/site2.html
|
|
file:///set1/site3.html
|
|
file:///set1/title.html'
|
|
|
|
got=`./document -c $config -u | sed "s#${PWD}/htdocs##" | sort`
|
|
|
|
if [ "$expected" != "$got" ]
|
|
then
|
|
fail "fifth document: expected
|
|
$expected
|
|
but got
|
|
$got"
|
|
fi
|
|
|
|
################################################################################
|
|
expected='' # no "bad local" extensions for file:///
|
|
set_attr max_hop_count # removes "empty%20file.html"
|
|
set_attr exclude_urls /CVS/
|
|
set_attr valid_extensions ".foo .html"
|
|
set_attr bad_extensions
|
|
|
|
set_attr mime_types $PWD/mime-without-htm
|
|
set_attr content_classifier $PWD/say-text
|
|
echo 'text/html html' > mime-without-htm
|
|
echo '#!/bin/sh
|
|
echo text/plain' > say-text
|
|
chmod 700 say-text
|
|
got=`$htdig "$@" -t -i -vv -c $config | grep "MIME type:" | sed -e"s-.*/--"`
|
|
if [ "$expected" != "$got" ]
|
|
then
|
|
fail "sixth htdig: expected
|
|
$expected
|
|
but got
|
|
$got"
|
|
fi
|
|
|
|
$htpurge -c $config || fail "couldn't purge sixth time"
|
|
|
|
expected='file:///set1/index.html
|
|
file:///set1/nph-location.foo
|
|
file:///set1/script.html
|
|
file:///set1/site%201.html
|
|
file:///set1/site2.html
|
|
file:///set1/site3.html
|
|
file:///set1/site4.html
|
|
file:///set1/sub%2520dir/empty%20file.html
|
|
file:///set1/title.html'
|
|
|
|
got=`./document -c $config -u | sed "s#${PWD}/htdocs##" | sort`
|
|
|
|
if [ "$expected" != "$got" ]
|
|
then
|
|
fail "sixth document: expected
|
|
$expected
|
|
but got
|
|
$got"
|
|
fi
|
|
|
|
|
|
################################################################################
|
|
set_attr local_urls_only
|
|
set_attr local_urls "http://somewhere/=$PWD/htdocs/"
|
|
set_attr local_user_urls "http://somewhere/=$PWD/,/set1/"
|
|
set_attr start_url "http://somewhere/~htdocs/"
|
|
|
|
set_attr valid_extensions
|
|
set_attr local_default_doc index.html
|
|
set_attr remove_default_doc index.html
|
|
|
|
$htdig "$@" -t -i -c $config || fail "couldn't dig seventh time"
|
|
$htpurge -c $config || fail "couldn't purge seventh time"
|
|
|
|
#local_urls_only can't handle .../~htdocs/sub%2520dir/empty%20file.html
|
|
expected='http://somewhere/~htdocs/
|
|
http://somewhere/~htdocs/script.html
|
|
http://somewhere/~htdocs/site%201.html
|
|
http://somewhere/~htdocs/site2.html
|
|
http://somewhere/~htdocs/site3.html
|
|
http://somewhere/~htdocs/site4.html
|
|
http://somewhere/~htdocs/title.html'
|
|
|
|
got=`./document -c $config -u | sort`
|
|
|
|
if [ "$expected" != "$got" ]
|
|
then
|
|
fail "seventh document: expected
|
|
$expected
|
|
but got
|
|
$got"
|
|
fi
|
|
|
|
|
|
/bin/rm mime-without-htm say-text
|
|
|
|
test_functions_action=--stop-apache
|
|
. ./test_functions
|