You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
677 lines
18 KiB
Perl
677 lines
18 KiB
Perl
3 years ago
|
#!/usr/bin/perl
|
||
|
use strict;
|
||
|
#
|
||
|
# Version 3.0.1 19-September-2002
|
||
|
#
|
||
|
# External converter for htdig 3.1.4 or later (Perl5 or later)
|
||
|
# Usage: (in htdig.conf)
|
||
|
#
|
||
|
#external_parsers: application/rtf->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
|
||
|
# text/rtf->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
|
||
|
# application/pdf->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
|
||
|
# application/postscript->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
|
||
|
# application/msword->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
|
||
|
# application/wordperfect5.1->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
|
||
|
# application/wordperfect6.0->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
|
||
|
# application/msexcel->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
|
||
|
# application/vnd.ms-excel->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
|
||
|
# application/vnd.ms-powerpoint->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl
|
||
|
# application/x-shockwave-flash->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
|
||
|
# application/x-shockwave-flash2-preview->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl
|
||
|
#
|
||
|
# Uses wp2html to convert Word and WordPerfect documents into HTML, and
|
||
|
# falls back to using Catdoc for Word and Catwpd for WordPerfect if
|
||
|
# Wp2html is unavailable or unable to convert.
|
||
|
#
|
||
|
# Uses range of other converters as available.
|
||
|
#
|
||
|
# If all else fails, attempts to read file without conversion.
|
||
|
#
|
||
|
########################################################################################
|
||
|
# Written by David Adams <d.j.adams@soton.ac.uk>.
|
||
|
# Based on conv_doc.pl written by Gilles Detillieux <grdetil@scrc.umanitoba.ca>,
|
||
|
# which in turn was based on the parse_word_doc.pl script, written by
|
||
|
# Jesse op den Brouw <MSQL_User@st.hhs.nl>.
|
||
|
########################################################################################
|
||
|
|
||
|
# Install Sys::AlarmCall if you can
|
||
|
eval "use Sys::AlarmCall";
|
||
|
|
||
|
######## Full paths of conversion utilities ##########
|
||
|
######## YOU MUST SET THESE ##########
|
||
|
######## (leave null those you don't have) ##########
|
||
|
|
||
|
# Wp2html converts Word & Wordperfect to HTML
|
||
|
# (get it from: http://www.res.bbsrc.ac.uk/wp2html/):
|
||
|
my $WP2HTML = '';
|
||
|
|
||
|
#Catwpd for WordPerfect to text conversion
|
||
|
# (you don't need this if you have wp2html)
|
||
|
# (get it from htdig site)
|
||
|
my $CATWPD = '';
|
||
|
|
||
|
# rtf2html converts Rich Text Font documents to HTML
|
||
|
# (get it from http://www.ice.ru/~vitus/catdoc/):
|
||
|
my $RTF2HTML = '';
|
||
|
|
||
|
# Catdoc converts Word (MicroSoft) to plain text
|
||
|
# (get it from: http://www.ice.ru/~vitus/catdoc/):
|
||
|
|
||
|
#version of catdoc for Word6, Word7 & Word97 files:
|
||
|
my $CATDOC = '';
|
||
|
|
||
|
#version of catdoc for Word2 files:
|
||
|
my $CATDOC2 = $CATDOC;
|
||
|
|
||
|
#version of catdoc for Word 5.1 for MAC:
|
||
|
my $CATDOCM = $CATDOC;
|
||
|
|
||
|
# PostScript to text converter
|
||
|
# (get it from the ghostscript 3.33 (or later) package):
|
||
|
my $CATPS = '';
|
||
|
|
||
|
# add to search path the directory which contains gs:
|
||
|
#$ENV{PATH} .= ":/usr/freeware/bin";
|
||
|
|
||
|
# PDF to HTML conversion script:
|
||
|
my $PDF2HTML = ''; # full pathname of pdf2html/pl script
|
||
|
|
||
|
# Excel (MicroSoft) to HTML converter
|
||
|
# (get it from www.xlhtml.org)
|
||
|
my $XLS2HTML = '';
|
||
|
|
||
|
# Excel (MicroSoft) to .CSV converter
|
||
|
# (you don't need this if you have xlhtml)
|
||
|
# (if you do want it, you can get it with catdoc)
|
||
|
my $CATXLS = '';
|
||
|
|
||
|
# Powerpoint (MicroSoft) to HTML converter
|
||
|
# (get it from www.xlhtml.org)
|
||
|
my $PPT2HTML = '';
|
||
|
|
||
|
# Shockwave Flash
|
||
|
# (extracts links from file)
|
||
|
my $SWF2HTML = ''; # full pathname of swf2html.pl script
|
||
|
|
||
|
# OpenOffice.org files
|
||
|
#my $OpenOffice2XML = '/usr/bin/unzip';
|
||
|
my $OpenOffice2XML = '';
|
||
|
# (remove multi-byte unicode from XML in OOo documents)
|
||
|
#my $strip_unicode = '| /usr/bin/iconv -c -s -f UTF-8 -t ISO-8859-1';
|
||
|
my $strip_unicode = '';
|
||
|
|
||
|
|
||
|
########################################################################
|
||
|
|
||
|
# Other Global Variables
|
||
|
my ($Success, $LOG, $Verbose, $CORE_MESS, $TMP, $RM, $ED, $Magic, $Time,
|
||
|
$Count, $Prog, $Input, $MIME_type, $URL, $Name, $Efile, $Maxerr,
|
||
|
$Redir, $Emark, $EEmark, $Method, $OP_Limit, $IP_Limit);
|
||
|
my (%HTML_Method, %TEXT_Method, %BAD_type);
|
||
|
|
||
|
|
||
|
&init; # initialise
|
||
|
my $size = -s $Input;
|
||
|
&quit("Input file size of $size at or above $IP_Limit limit" ) if $size >= $IP_Limit;
|
||
|
&store_methods; #
|
||
|
&read_magic; # Magic reveals type
|
||
|
&error_setup; # re-route standard error o/p from utilities
|
||
|
|
||
|
# see if a document -> HTML converter will work:
|
||
|
&run('&try_html');
|
||
|
if ($Success) { &quit(0) }
|
||
|
|
||
|
# try a document -> text converter:
|
||
|
&run('&try_text');
|
||
|
if ($Success) { &quit(0) }
|
||
|
|
||
|
# see if a known problem
|
||
|
my $fail = &cannot_do;
|
||
|
if ($fail) { &quit($fail) }
|
||
|
|
||
|
# last-ditch attempt, try copying document
|
||
|
&try_plain;
|
||
|
if ($Success) {&quit(0)}
|
||
|
|
||
|
&quit("UNABLE to convert");
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
sub init {
|
||
|
|
||
|
# Doc2html log file
|
||
|
$LOG = $ENV{'DOC2HTML_LOG'} || '';
|
||
|
#
|
||
|
if ($LOG) {
|
||
|
open(STDERR,">>$LOG"); # ignore possible failure to open
|
||
|
} # else O/P really does go to STDERR
|
||
|
|
||
|
# Set to 1 for O/P to STDERR or Log file
|
||
|
$Verbose = exists($ENV{'DOC2HTML_LOG'}) ? 1 : 0;
|
||
|
|
||
|
# Limiting size of file doc2html.pl will try to process (default 20Mbyte)
|
||
|
$IP_Limit = $ENV{'DOC2HTML_IP_LIMIT'} || 20000000;
|
||
|
|
||
|
# Limit for O/P returned to htdig (default 10Mbyte)
|
||
|
$OP_Limit = $ENV{'DOC2HTML_OP_LIMIT'} || 10000000;
|
||
|
|
||
|
# Mark error message produced within doc2html script
|
||
|
$Emark = "!\t";
|
||
|
# Mark error message produced by conversion utility
|
||
|
$EEmark = "!!\t";
|
||
|
|
||
|
# Message to STDERR if core dump detected
|
||
|
$CORE_MESS = "CORE DUMPED";
|
||
|
|
||
|
# Directory for temporary files
|
||
|
$TMP = "/tmp/htdig";
|
||
|
if (! -d $TMP) {
|
||
|
mkdir($TMP,0700) or die "Unable to create directory \"$TMP\": $!";
|
||
|
}
|
||
|
# Current directory during run of script:
|
||
|
chdir $TMP or warn "Cannot change directory to $TMP\n";
|
||
|
|
||
|
# File for error output from utility
|
||
|
$Efile = 'doc_err.' . $$;
|
||
|
|
||
|
# Max. number of lines of error output from utility copied
|
||
|
$Maxerr = 10;
|
||
|
|
||
|
# System command to delete a file
|
||
|
$RM = "/bin/rm -f";
|
||
|
|
||
|
# Line editor to do substitution
|
||
|
$ED = "/bin/sed -e";
|
||
|
if ($^O eq "MSWin32") {$ED = "$^X -pe"}
|
||
|
|
||
|
$Time = 60; # allow 60 seconds for external utility to complete
|
||
|
|
||
|
$Success = 0;
|
||
|
$Count = 0;
|
||
|
$Method = '';
|
||
|
$Prog = $0;
|
||
|
$Prog =~ s#^.*/##;
|
||
|
$Prog =~ s/\..*?$//;
|
||
|
|
||
|
$Input = $ARGV[0] or die "No filename given\n";
|
||
|
$MIME_type = $ARGV[1] or die "No MIME-type given";
|
||
|
$URL = $ARGV[2] || '?';
|
||
|
$Name = $URL;
|
||
|
$Name =~ s#^.*/##;
|
||
|
$Name =~ s/%([A-F0-9][A-F0-9])/pack("C", hex($1))/gie;
|
||
|
|
||
|
if ($Verbose and not $LOG) { print STDERR "\n$Prog: [$MIME_type] " }
|
||
|
if ($LOG) { print STDERR "$URL [$MIME_type] " }
|
||
|
|
||
|
}
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
sub store_methods {
|
||
|
# The method of dealing with each file type is set up here.
|
||
|
# Edit as necessary
|
||
|
|
||
|
my ($mime_type,$magic,$cmd,$cmdl,$type,$description);
|
||
|
|
||
|
my $name = quotemeta($Name);
|
||
|
|
||
|
####Document -> HTML converters####
|
||
|
|
||
|
# WordPerfect documents
|
||
|
if ($WP2HTML) {
|
||
|
$mime_type = "application/wordperfect|application/msword";
|
||
|
$cmd = $WP2HTML;
|
||
|
$cmdl = "($cmd -q -DTitle=\"[$name]\" -c doc2html.cfg -s doc2html.sty -i $Input -O; $RM CmdLine.ovr)";
|
||
|
$magic = '\377WPC';
|
||
|
&store_html_method('WordPerfect (wp2html)',$cmd,$cmdl,$mime_type,$magic);
|
||
|
}
|
||
|
|
||
|
# Word documents
|
||
|
if ($WP2HTML) {
|
||
|
$mime_type = "application/msword";
|
||
|
$cmd = $WP2HTML;
|
||
|
$cmdl = "($cmd -q -DTitle=\"[$name]\" -c doc2html.cfg -s doc2html.sty -i $Input -O; $RM CmdLine.ovr)";
|
||
|
$magic = '^\320\317\021\340';
|
||
|
&store_html_method('Word (wp2html)',$cmd,$cmdl,$mime_type,$magic);
|
||
|
}
|
||
|
|
||
|
# RTF documents
|
||
|
if ($RTF2HTML) {
|
||
|
$mime_type = "application/msword|application/rtf|text/rtf";
|
||
|
$cmd = $RTF2HTML;
|
||
|
# Rtf2html uses filename as title, change this:
|
||
|
$cmdl = "$cmd $Input | $ED \"s#^<TITLE>$Input</TITLE>#<TITLE>[$name]</TITLE>#\"";
|
||
|
$magic = '^{\134rtf';
|
||
|
&store_html_method('RTF (rtf2html)',$cmd,$cmdl,$mime_type,$magic);
|
||
|
}
|
||
|
|
||
|
# Microsoft Excel spreadsheet
|
||
|
if ($XLS2HTML) {
|
||
|
$mime_type = "application/msexcel|application/vnd.ms-excel";
|
||
|
$cmd = $XLS2HTML;
|
||
|
# xlHtml uses filename as title, change this:
|
||
|
$cmdl = "$cmd -fw $Input | $ED \"s#<TITLE>$Input</TITLE>#<TITLE>[$name]</TITLE>#\"";
|
||
|
$magic = '^\320\317\021\340';
|
||
|
&store_html_method('Excel (xlHtml)',$cmd,$cmdl,$mime_type,$magic);
|
||
|
}
|
||
|
|
||
|
# Microsoft Powerpoint Presentation
|
||
|
if ($PPT2HTML) {
|
||
|
$mime_type = "application/vnd.ms-powerpoint";
|
||
|
$cmd = $PPT2HTML;
|
||
|
# xlHtml uses filename as title, change this:
|
||
|
$cmdl = "$cmd $Input | $ED \"s#<TITLE>$Input</TITLE>#<TITLE>[$name]</TITLE>#\"";
|
||
|
$magic = '^\320\317\021\340';
|
||
|
&store_html_method('Powerpoint (pptHtml)',$cmd,$cmdl,$mime_type,$magic);
|
||
|
}
|
||
|
|
||
|
# Adobe PDF file using Perl script
|
||
|
if ($PDF2HTML) {
|
||
|
$mime_type = "application/pdf";
|
||
|
$cmd = $PDF2HTML;
|
||
|
# Replace default title (if used) with filename:
|
||
|
$cmdl = "$cmd $Input $mime_type $name";
|
||
|
$magic = '%PDF-|\0PDF CARO\001\000\377';
|
||
|
&store_html_method('PDF (pdf2html)',$cmd,$cmdl,$mime_type,$magic);
|
||
|
}
|
||
|
|
||
|
# Shockwave Flash file using Perl script
|
||
|
if ($SWF2HTML) {
|
||
|
$mime_type = "application/x-shockwave-flash";
|
||
|
$cmd = $SWF2HTML;
|
||
|
$cmdl = "$cmd $Input";
|
||
|
$magic = '^FWS[\001-\010]'; # versions 1 to 5, perhaps some later versions
|
||
|
&store_html_method('Shockwave-Flash (swf2html)',$cmd,$cmdl,$mime_type,$magic);
|
||
|
}
|
||
|
|
||
|
# OpenOffice Documents
|
||
|
if ($OpenOffice2XML) {
|
||
|
$mime_type = "application/vnd.sun.xml.writer|application/vnd.sun.xml.impress|application/vnd.sun.xml.calc|application/vnd.sun.xml.draw|application/vnd.sun.xml.math";
|
||
|
$cmd = $OpenOffice2XML;
|
||
|
$cmdl = "$cmd -p -qq $Input content.xml | /bin/sed -r 's/<[^>]*>/ /gi' $strip_unicode";
|
||
|
$magic = 'PK';
|
||
|
&store_html_method('OpenOffice XML (oo2xml)',$cmd,$cmdl,$mime_type,$magic);
|
||
|
}
|
||
|
|
||
|
####Document -> Text converters####
|
||
|
|
||
|
# Word6, Word7 & Word97 documents
|
||
|
if ($CATDOC) {
|
||
|
$mime_type = "application/msword";
|
||
|
$cmd = $CATDOC;
|
||
|
# -b option increases chance of success:
|
||
|
$cmdl = "$cmd -a -b -w $Input";
|
||
|
$magic = '^\320\317\021\340';
|
||
|
&store_text_method('Word (catdoc)',$cmd,$cmdl,$mime_type,$magic);
|
||
|
}
|
||
|
|
||
|
# Word2 documents
|
||
|
if ($CATDOC2) {
|
||
|
$mime_type = "application/msword";
|
||
|
$cmd = $CATDOC2;
|
||
|
$cmdl = "$cmd -a -b -w $Input";
|
||
|
$magic = '^\333\245-\000';
|
||
|
&store_text_method('Word2 (catdoc)',$cmd,$cmdl,$mime_type,$magic);
|
||
|
}
|
||
|
|
||
|
# Word 5.1 for MAC documents
|
||
|
if ($CATDOCM) {
|
||
|
$mime_type = "application/msword";
|
||
|
$cmd = $CATDOCM;
|
||
|
$cmdl = "$cmd -a -b -w $Input";
|
||
|
$magic = '^\3767\000#\000\000\000\000';
|
||
|
&store_text_method('MACWord (catdoc)',$cmd,$cmdl,$mime_type,$magic);
|
||
|
}
|
||
|
|
||
|
# PostScript files
|
||
|
if ($CATPS) {
|
||
|
$mime_type = "application/postscript";
|
||
|
$cmd = $CATPS;
|
||
|
# allow PS interpreter to give error messages
|
||
|
$cmdl = "($cmd; $RM _temp_.???) < $Input";
|
||
|
$magic = '^.{0,20}?%!|^\033%-12345.*\n%!';
|
||
|
&store_text_method('PostScript (ps2ascii)',$cmd,$cmdl,$mime_type,$magic);
|
||
|
}
|
||
|
|
||
|
# Microsoft Excel file
|
||
|
if ($CATXLS) {
|
||
|
$mime_type = "application/vnd.ms-excel";
|
||
|
$cmd = $CATXLS;
|
||
|
$cmdl = "$cmd $Input";
|
||
|
$magic = '^\320\317\021\340';
|
||
|
&store_text_method('MS Excel (xls2csv)',$cmd,$cmdl,$mime_type,$magic);
|
||
|
}
|
||
|
|
||
|
# WordPerfect document
|
||
|
if ($CATWPD) {
|
||
|
$mime_type = "application/wordperfect|application/msword";
|
||
|
$cmd = $CATWPD;
|
||
|
$cmdl = "$cmd $Input";
|
||
|
$magic = '\377WPC';
|
||
|
&store_text_method('WordPerfect (catwpd)',$cmd,$cmdl,$mime_type,$magic);
|
||
|
}
|
||
|
|
||
|
|
||
|
####Documents that cannot be converted####
|
||
|
|
||
|
# wrapped encapsulated Postscript
|
||
|
$type = "EPS";
|
||
|
$magic = '^\305\320\323\306 \0';
|
||
|
$description = 'wrapped Encapsulated Postscript';
|
||
|
&store_cannot_do($type,$magic,$description);
|
||
|
|
||
|
# Shockwave Flash version 6
|
||
|
$type = "SWF6";
|
||
|
$description = 'Shockwave-Flash Version 6';
|
||
|
$magic = '^CWS\006';
|
||
|
&store_cannot_do($type,$magic,$description);
|
||
|
|
||
|
#### Binary (data or whatever)
|
||
|
###$type = "BIN";
|
||
|
###$magic = '[\000-\007\016-\037\177]'; # rather crude test!
|
||
|
###$description = 'apparently binary';
|
||
|
###&store_cannot_do($type,$magic,$description);
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
sub read_magic {
|
||
|
|
||
|
# Read first bytes of file to check for file type
|
||
|
open(FILE, "< $Input") || die "Can't open file $Input\n";
|
||
|
read FILE,$Magic,256;
|
||
|
close FILE;
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
sub error_setup {
|
||
|
|
||
|
if ($Efile) {
|
||
|
open SAVERR, ">&STDERR";
|
||
|
if (open STDERR, "> $Efile") {
|
||
|
print SAVERR " Overwriting $Efile\n" if (-s $Efile);
|
||
|
$Redir = 1;
|
||
|
} else { close SAVERR }
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
sub run {
|
||
|
|
||
|
my $routine = shift;
|
||
|
my $return;
|
||
|
|
||
|
if (defined &alarm_call) {
|
||
|
$return = alarm_call($Time, $routine);
|
||
|
} else {
|
||
|
eval $routine;
|
||
|
$return = $@ if $@;
|
||
|
}
|
||
|
|
||
|
if ($return) { &quit($return) }
|
||
|
|
||
|
}
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
sub try_html {
|
||
|
|
||
|
my($set,$cmnd,$type);
|
||
|
|
||
|
$Success = 0;
|
||
|
foreach $type (keys %HTML_Method) {
|
||
|
$set = $HTML_Method{$type};
|
||
|
if (($MIME_type =~ m/$set->{'mime'}/i) and
|
||
|
($Magic =~ m/$set->{'magic'}/s)) { # found the method to use
|
||
|
$Method = $type;
|
||
|
my $cmnd = $set->{'cmnd'};
|
||
|
if (! -x $cmnd) {
|
||
|
warn "Unable to execute $cmnd for $type document\n";
|
||
|
return;
|
||
|
}
|
||
|
if (not open(CAT, "$set->{'command'} |")) {
|
||
|
warn "$cmnd doesn't want to be opened using pipe\n";
|
||
|
return;
|
||
|
}
|
||
|
while (<CAT>) {
|
||
|
# getting something, so it is working
|
||
|
$Success = 1;
|
||
|
if ($_ !~ m/^<!--/) { # skip comment lines inserted by converter
|
||
|
print;
|
||
|
$Count += length;
|
||
|
if ($Count > $OP_Limit) { last }
|
||
|
}
|
||
|
}
|
||
|
close CAT;
|
||
|
last;
|
||
|
}
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
sub try_text {
|
||
|
|
||
|
my($set,$cmnd,$type);
|
||
|
|
||
|
$Success = 0;
|
||
|
foreach $type (keys %TEXT_Method) {
|
||
|
$set = $TEXT_Method{$type};
|
||
|
if (($MIME_type =~ m/$set->{'mime'}/i) and
|
||
|
($Magic =~ m/$set->{'magic'}/s)) { # found the method to use
|
||
|
$Method = $type;
|
||
|
my $cmnd = $set->{'cmnd'};
|
||
|
if (! -x $cmnd) { die "Unable to execute $cmnd for $type document\n" }
|
||
|
|
||
|
# Open file via selected converter, output head, then its text:
|
||
|
open(CAT, "$set->{'command'} |") or
|
||
|
die "$cmnd doesn't want to be opened using pipe\n";
|
||
|
&head;
|
||
|
print "<BODY>\n<PRE>\n";
|
||
|
$Success = 1;
|
||
|
while (<CAT>) {
|
||
|
s/\255/-/g; # replace dashes with hyphens
|
||
|
# replace bell, backspace, tab. etc. with single space:
|
||
|
s/[\000-\040]+/ /g;
|
||
|
if (length > 1) { # if not just a single character, eg space
|
||
|
print &HTML($_), "\n";
|
||
|
$Count += length;
|
||
|
if ($Count > $OP_Limit) { last }
|
||
|
}
|
||
|
}
|
||
|
close CAT;
|
||
|
|
||
|
print "</PRE>\n</BODY>\n</HTML>\n";
|
||
|
last;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
sub cannot_do {
|
||
|
|
||
|
my ($type,$set);
|
||
|
|
||
|
# see if known, unconvertable type
|
||
|
$Method = '';
|
||
|
foreach $type (keys %BAD_type) {
|
||
|
$set = $BAD_type{$type};
|
||
|
if ($Magic =~ m/$set->{'magic'}/s) { # known problem
|
||
|
return "CANNOT DO $set->{'desc'} ";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
sub try_plain {
|
||
|
|
||
|
$Success = 0;
|
||
|
####### if ($Magic !~ m/^[\000-\007\016-\037\177]) {
|
||
|
if (-T $Input) { # Looks like text, so go for it:
|
||
|
$Method = 'Plain Text';
|
||
|
open(FILE, "<$Input") || die "Error reading $Input\n";
|
||
|
$Success = 1;
|
||
|
$Method = 'Plain Text';
|
||
|
&head;
|
||
|
print "<BODY>\n<PRE>\n";
|
||
|
|
||
|
while (<FILE>) {
|
||
|
# replace bell, backspace, tab. etc. with single space:
|
||
|
s/[\000-\040\177]+/ /g;
|
||
|
if (length > 1) {
|
||
|
print &HTML($_), "\n";
|
||
|
$Count += length;
|
||
|
if ($Count > $OP_Limit) { last }
|
||
|
}
|
||
|
}
|
||
|
close FILE;
|
||
|
print "</PRE>\n</BODY>\n</HTML>\n";
|
||
|
|
||
|
} else { $Method = '' }
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
sub HTML {
|
||
|
|
||
|
my $text = shift;
|
||
|
|
||
|
$text =~ s/\f/\n/gs; # replace form feed
|
||
|
$text =~ s/\s+/ /g; # replace multiple spaces, etc. with a single space
|
||
|
$text =~ s/\s+$//gm; # remove trailing spaces
|
||
|
$text =~ s/&/&/g;
|
||
|
$text =~ s/</</g;
|
||
|
$text =~ s/>/>/g;
|
||
|
|
||
|
return $text;
|
||
|
}
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
sub store_html_method {
|
||
|
|
||
|
my $type = shift;
|
||
|
my $cmnd = shift;
|
||
|
my $cline = shift;
|
||
|
my $mime = shift;
|
||
|
my $magic = shift;
|
||
|
|
||
|
$HTML_Method{$type} = {
|
||
|
'mime' => $mime,
|
||
|
'magic' => $magic,
|
||
|
'cmnd' => $cmnd,
|
||
|
'command' => $cline,
|
||
|
};
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
sub store_text_method {
|
||
|
|
||
|
my $type = shift;
|
||
|
my $cmnd = shift;
|
||
|
my $cline = shift;
|
||
|
my $mime = shift;
|
||
|
my $magic = shift;
|
||
|
|
||
|
$TEXT_Method{$type} = {
|
||
|
'mime' => $mime,
|
||
|
'magic' => $magic,
|
||
|
'cmnd' => $cmnd,
|
||
|
'command' => $cline,
|
||
|
};
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
sub store_cannot_do {
|
||
|
|
||
|
my $type = shift;
|
||
|
my $magic = shift;
|
||
|
my $desc = shift;
|
||
|
|
||
|
$BAD_type{$type} = {
|
||
|
'magic' => $magic,
|
||
|
'desc' => $desc,
|
||
|
};
|
||
|
|
||
|
return;
|
||
|
|
||
|
}
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
sub head {
|
||
|
|
||
|
print "<HTML>\n<HEAD>\n";
|
||
|
print "<TITLE>[" . $Name . "]</TITLE>\n";
|
||
|
print "</HEAD>\n";
|
||
|
|
||
|
}
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
sub quit {
|
||
|
|
||
|
if ($Redir) { # end redirection of STDERR to temporary file
|
||
|
close STDERR;
|
||
|
open STDERR, ">&SAVERR";
|
||
|
}
|
||
|
|
||
|
if ($Verbose) {
|
||
|
print STDERR "$Method $Count" if ($Success);
|
||
|
print STDERR "\n";
|
||
|
}
|
||
|
|
||
|
if ($Count > $OP_Limit) {
|
||
|
print STDERR $Emark, "Output truncated after limit $OP_Limit reached\n";
|
||
|
}
|
||
|
|
||
|
my $return = shift;
|
||
|
if ($return) {
|
||
|
print STDERR $Emark, $return, "\n";
|
||
|
$return = 1;
|
||
|
}
|
||
|
|
||
|
chdir $TMP;
|
||
|
if ($Efile && -s $Efile) {
|
||
|
open EFILE, "<$Efile";
|
||
|
my $c = 0;
|
||
|
while (<EFILE>) {
|
||
|
$c++;
|
||
|
if ($c <= $Maxerr) {
|
||
|
print STDERR $EEmark, $_;
|
||
|
}
|
||
|
}
|
||
|
close EFILE;
|
||
|
print STDERR $Emark, " ... (total of $c lines of error messages)\n" if ($c > $Maxerr);
|
||
|
}
|
||
|
unlink $Efile if ($Efile && -e $Efile);
|
||
|
|
||
|
if (-e "core" && (-M "core" < 0)) {
|
||
|
print STDERR $Emark, "$CORE_MESS\n";
|
||
|
}
|
||
|
exit $return;
|
||
|
}
|