# # Part of the ht://Dig package # Copyright (c) 1999-2004 The ht://Dig Group # For copyright details, see the file COPYING in your distribution # or the GNU Library General Public License (LGPL) version 2 or later # # # $Id: t_htdig_local,v 1.10 2004/05/28 13:15:30 lha Exp $ # # Tests the following config attributes: # bad_local_extensions # check_unique_md5 # content_classifier # database_dir # exclude_urls # limit_normalized # limit_urls_to # local_extensions # local_urls # local_urls_only # local_user_urls # max_hop_count # md5_db # mime_types # remove_default_doc # server_aliases # start_url test_functions_action=--start-apache . ./test_functions # set up config file with chosen non-default values config=$testdir/conf/htdig.conf.tmp cp $testdir/conf/htdig.conf2 $config ################################################################################ #test for local-file-system access to URLs /bin/rm -f var/htdig2/* set_attr start_url "http://localhost:7400/set1/ http://localhost:7400/set1/title.html?site3.html http://localhost:7400/set1/title.html?site4.html" # ban ite3.htm from query, but not from main URL. # Allow site3.html, but not title.html?site3.html set_attr bad_querystr ite3.htm expected='bad_local.htm' got=`$htdig "$@" -t -i -vv -c $config | grep "Bad local extension:" | sed -e"s-.*/--"` if [ "$expected" != "$got" ] then fail "first htdig: expected $expected but got $got" fi expected='db.docdb db.docs db.docs.index db.excerpts db.worddump db.words.db db.words.db_weakcmpr' got=`/bin/ls var/htdig2` if [ "$expected" != "$got" ] then fail "created files: expected $expected but got $got" fi $htpurge -c $config # should http://localhost:7400/set1/sub%2520dir be purged? expected='http://localhost:7400/set1/ http://localhost:7400/set1/bad_local.htm http://localhost:7400/set1/script.html http://localhost:7400/set1/site%201.html http://localhost:7400/set1/site2.html http://localhost:7400/set1/site3.html http://localhost:7400/set1/site4.html http://localhost:7400/set1/sub%2520dir/ http://localhost:7400/set1/sub%2520dir/empty%20file.html http://localhost:7400/set1/title.html http://localhost:7400/set1/title.html?site4.html' got=`./document -c $config -u | sort` if [ "$expected" != "$got" ] then fail "first document: expected $expected but got $got" fi set_attr bad_query_str ################################################################################ # limit_urls_to applies before server alias expansion set_attr start_url http://myhost/set1/index.html set_attr limit_urls_to "http://myhost/set1/" set_attr server_aliases myhost=localhost:7400 $htdig "$@" -t -i -c $config || fail "couldn't dig second time" $htpurge -c $config || fail "couldn't purge second time" # only start_url uses alias, so only it passes the limit_urls_to test expected='http://localhost:7400/set1/' got=`./document -c $config -u | sort` if [ "$expected" != "$got" ] then fail "second document: expected $expected but got $got" fi ################################################################################ # Check remote URLs not retrieved if local_urls_only specified set_attr local_urls_only true set_attr remove_default_doc site2.html # Note: local_urls_only doesn't handle directories without a default doc set_attr local_default_doc "site2.html empty%20file.html" set_attr start_url http://myhost/set1/index.html # don't care what the aliased URL is; only check the normalized one set_attr limit_urls_to set_attr limit_normalized "http://localhost:7400/set1/" set_attr server_aliases myhost=localhost:7400 $htdig "$@" -t -i -c $config || fail "couldn't dig third time" $htpurge -c $config || fail "couldn't purge third time" expected='http://localhost:7400/set1/ http://localhost:7400/set1/index.html http://localhost:7400/set1/script.html http://localhost:7400/set1/site%201.html http://localhost:7400/set1/site3.html http://localhost:7400/set1/site4.html http://localhost:7400/set1/sub%2520dir/ http://localhost:7400/set1/title.html' got=`./document -c $config -u | sort` if [ "$expected" != "$got" ] then fail "third document: expected $expected but got $got" fi set_attr remove_default_doc index.html set_attr local_urls_only false set_attr limit_normalized ################################################################################ #test for URLs expected='' # no "bad local" extensions for file:/// # Check only one "title.html" found... set_attr check_unique_md5 true set_attr start_url "http://localhost:7400/set1/title.html file://$PWD/htdocs/set1/" set_attr limit_urls_to '${start_url}' got=`$htdig "$@" -t -i -vv -c $config | grep "Bad local extension:" | sed -e"s-.*/--"` if [ "$expected" != "$got" ] then fail "fourth htdig: expected $expected but got $got" fi expected='db.docdb db.docs db.docs.index db.excerpts db.md5hash.db db.worddump db.words.db db.words.db_weakcmpr' got=`/bin/ls var/htdig2` if [ "$expected" != "$got" ] then fail "fourth created files: expected $expected but got $got" fi $htpurge -c $config || fail "couldn't purge fourth time" expected='file:///set1/bad_local.htm file:///set1/index.html file:///set1/script.html file:///set1/site%201.html file:///set1/site2.html file:///set1/site3.html file:///set1/site4.html file:///set1/sub%2520dir/empty%20file.html /title.html' got=`./document -c $config -u | sed "s#${PWD}/htdocs##" | sort | sed "s#.*/title.html#/title.html#"` if [ "$expected" != "$got" ] then fail "fourth document: expected $expected but got $got" fi ################################################################################ #test mime types handling expected='' # no "bad local" extensions for file:/// set_attr max_hop_count 1 # removes "empty%20file.html" set_attr exclude_urls "site4.html script.html site[3].html" set_attr bad_extensions .foo set_attr local_urls_only false rm -f var/htdig2/db.md5hash.db set_attr md5_db '${database_base}.md5.db' set_attr mime_types $PWD/mime-without-htm set_attr content_classifier $PWD/say-text echo 'text/html html' > mime-without-htm echo '#!/bin/sh echo text/plain' > say-text chmod 700 say-text got=`$htdig "$@" -t -i -vv -c $config | grep "MIME type:" | sed -e"s-.*/--"` if [ "$expected" != "$got" ] then fail "fifth htdig: expected $expected but got $got" fi expected='db.docdb db.docs db.docs.index db.excerpts db.md5.db db.worddump db.words.db db.words.db_weakcmpr' got=`/bin/ls var/htdig2` if [ "$expected" != "$got" ] then fail "fifth created files: expected $expected but got $got" fi $htpurge -c $config || fail "couldn't purge fifth time" expected='file:///set1/bad_local.htm file:///set1/index.html file:///set1/nph-location.cgi file:///set1/site%201.html file:///set1/site2.html file:///set1/site3.html file:///set1/title.html' got=`./document -c $config -u | sed "s#${PWD}/htdocs##" | sort` if [ "$expected" != "$got" ] then fail "fifth document: expected $expected but got $got" fi ################################################################################ expected='' # no "bad local" extensions for file:/// set_attr max_hop_count # removes "empty%20file.html" set_attr exclude_urls /CVS/ set_attr valid_extensions ".foo .html" set_attr bad_extensions set_attr mime_types $PWD/mime-without-htm set_attr content_classifier $PWD/say-text echo 'text/html html' > mime-without-htm echo '#!/bin/sh echo text/plain' > say-text chmod 700 say-text got=`$htdig "$@" -t -i -vv -c $config | grep "MIME type:" | sed -e"s-.*/--"` if [ "$expected" != "$got" ] then fail "sixth htdig: expected $expected but got $got" fi $htpurge -c $config || fail "couldn't purge sixth time" expected='file:///set1/index.html file:///set1/nph-location.foo file:///set1/script.html file:///set1/site%201.html file:///set1/site2.html file:///set1/site3.html file:///set1/site4.html file:///set1/sub%2520dir/empty%20file.html file:///set1/title.html' got=`./document -c $config -u | sed "s#${PWD}/htdocs##" | sort` if [ "$expected" != "$got" ] then fail "sixth document: expected $expected but got $got" fi ################################################################################ set_attr local_urls_only set_attr local_urls "http://somewhere/=$PWD/htdocs/" set_attr local_user_urls "http://somewhere/=$PWD/,/set1/" set_attr start_url "http://somewhere/~htdocs/" set_attr valid_extensions set_attr local_default_doc index.html set_attr remove_default_doc index.html $htdig "$@" -t -i -c $config || fail "couldn't dig seventh time" $htpurge -c $config || fail "couldn't purge seventh time" #local_urls_only can't handle .../~htdocs/sub%2520dir/empty%20file.html expected='http://somewhere/~htdocs/ http://somewhere/~htdocs/script.html http://somewhere/~htdocs/site%201.html http://somewhere/~htdocs/site2.html http://somewhere/~htdocs/site3.html http://somewhere/~htdocs/site4.html http://somewhere/~htdocs/title.html' got=`./document -c $config -u | sort` if [ "$expected" != "$got" ] then fail "seventh document: expected $expected but got $got" fi /bin/rm mime-without-htm say-text test_functions_action=--stop-apache . ./test_functions