You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
566 lines
16 KiB
566 lines
16 KiB
/*
|
|
Copyright (C) 2001 Andreas Schlapbach <schlpbch@iam.unibe.ch>
|
|
Copyright (C) 2003 Antonio Larrosa <larrosa@kde.org>
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2 of the License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; see the file COPYING. If not, write to
|
|
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
#include "archivedialog.h"
|
|
#include <tqwidget.h>
|
|
#include <khtml_part.h>
|
|
#include "archiveviewbase.h"
|
|
#include <kinstance.h>
|
|
#include <ktempfile.h>
|
|
#include <ktar.h>
|
|
|
|
#include <kfiledialog.h>
|
|
#include <kmessagebox.h>
|
|
#include <kpassivepopup.h>
|
|
#include <klocale.h>
|
|
#include <kio/netaccess.h>
|
|
#include <khtml_part.h>
|
|
#include <kdebug.h>
|
|
#include <kgenericfactory.h>
|
|
#include <kactivelabel.h>
|
|
#include <tqstylesheet.h>
|
|
#include <tqiodevice.h>
|
|
#include <klistview.h>
|
|
#include <kio/job.h>
|
|
#include <kapplication.h>
|
|
#include <kurllabel.h>
|
|
#include <kprogress.h>
|
|
#include <kstringhandler.h>
|
|
#include <tqpushbutton.h>
|
|
|
|
#undef DEBUG_WAR
|
|
|
|
#define CONTENT_TYPE "<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">"
|
|
|
|
ArchiveDialog::ArchiveDialog(TQWidget *parent, const TQString &filename,
|
|
KHTMLPart *part) :
|
|
KDialogBase(parent, "WebArchiveDialog", false, i18n("Web Archiver"),
|
|
KDialogBase::Ok | KDialogBase::Cancel | KDialogBase::User1 ),
|
|
m_bPreserveWS(false), m_tmpFile(0), m_url(part->url())
|
|
{
|
|
m_widget=new ArchiveViewBase(this);
|
|
setMainWidget(m_widget);
|
|
setWFlags(getWFlags() | WDestructiveClose);
|
|
|
|
m_widget->urlLabel->setText(TQString("<a href=\"")+m_url.url()+"\">"+KStringHandler::csqueeze( m_url.url(), 80 )+"</a>");
|
|
m_widget->targetLabel->setText(TQString("<a href=\"")+filename+"\">"+KStringHandler::csqueeze( filename, 80 )+"</a>");
|
|
|
|
if(part->document().ownerDocument().isNull())
|
|
m_document = part->document();
|
|
else
|
|
m_document = part->document().ownerDocument();
|
|
|
|
enableButtonOK( false );
|
|
showButton( KDialogBase::User1, false );
|
|
setButtonOK( KStdGuiItem::close() );
|
|
|
|
m_tarBall = new KTar(filename,"application/x-gzip");
|
|
}
|
|
|
|
void ArchiveDialog::archive()
|
|
{
|
|
m_iterator=0;
|
|
m_currentLVI=0;
|
|
if (m_tarBall->open(IO_WriteOnly)) {
|
|
#ifdef DEBUG_WAR
|
|
kdDebug(90110) << "Web Archive opened " << endl;
|
|
#endif
|
|
|
|
m_linkDict.insert(TQString("index.html"), TQString(""));
|
|
saveFile("index.html");
|
|
|
|
} else {
|
|
const TQString title = i18n( "Unable to Open Web-Archive" );
|
|
const TQString text = i18n( "Unable to open \n %1 \n for writing." ).arg(m_tarBall->fileName());
|
|
KMessageBox::sorry( 0L, text, title );
|
|
}
|
|
}
|
|
|
|
ArchiveDialog::~ArchiveDialog()
|
|
{
|
|
delete m_tarBall;
|
|
}
|
|
|
|
/* Store the HTMLized DOM-Tree to a temporary file and add it to the Tar-Ball */
|
|
|
|
void ArchiveDialog::saveFile( const TQString&)
|
|
{
|
|
KTempFile tmpFile;
|
|
if (!(tmpFile.status())) {
|
|
|
|
TQString temp;
|
|
|
|
m_state=Retrieving;
|
|
TQTextStream *tempStream = new TQTextStream(&temp, IO_ReadOnly);
|
|
|
|
saveToArchive(tempStream);
|
|
|
|
delete tempStream;
|
|
|
|
m_downloadedURLDict.clear();
|
|
|
|
m_state=Downloading;
|
|
m_widget->progressBar->setTotalSteps(m_urlsToDownload.count());
|
|
m_widget->progressBar->setProgress(0);
|
|
downloadNext();
|
|
|
|
} else {
|
|
const TQString title = i18n( "Could Not Open Temporary File" );
|
|
const TQString text = i18n( "Could not open a temporary file" );
|
|
KMessageBox::sorry( 0, text, title );
|
|
}
|
|
}
|
|
|
|
void ArchiveDialog::setSavingState()
|
|
{
|
|
KTempFile tmpFile;
|
|
TQTextStream* textStream = tmpFile.textStream();
|
|
textStream->setEncoding(TQTextStream::UnicodeUTF8);
|
|
|
|
m_widget->progressBar->setProgress(m_widget->progressBar->totalSteps());
|
|
|
|
m_state=Saving;
|
|
saveToArchive(textStream);
|
|
|
|
tmpFile.close();
|
|
|
|
TQString fileName="index.html";
|
|
TQFile file(tmpFile.name());
|
|
file.open(IO_ReadOnly);
|
|
m_tarBall->writeFile(fileName, TQString(), TQString(), file.size(), file.readAll());
|
|
#ifdef DEBUG_WAR
|
|
kdDebug(90110) << "HTML-file written: " << fileName << endl;
|
|
#endif
|
|
file.close();
|
|
|
|
// Cleaning up
|
|
file.remove();
|
|
m_tarBall->close();
|
|
|
|
KPassivePopup::message( m_url.prettyURL() , i18n( "Archiving webpage completed." ), this );
|
|
|
|
enableButtonOK(true);
|
|
setEscapeButton(Ok);
|
|
actionButton(Ok)->setFocus();
|
|
enableButtonCancel(false);
|
|
}
|
|
|
|
/* Recursively travers the DOM-Tree */
|
|
|
|
void ArchiveDialog::saveToArchive(TQTextStream* _textStream)
|
|
{
|
|
if (!_textStream) return;
|
|
|
|
// Add a doctype
|
|
|
|
(*_textStream) <<"<!-- saved from:" << endl << m_url.url() << " -->" << endl;
|
|
|
|
try
|
|
{
|
|
saveArchiveRecursive(m_document.documentElement(), m_url, _textStream, 0);
|
|
}
|
|
catch (...)
|
|
{
|
|
kdDebug(90110) << "exception" << endl;
|
|
}
|
|
}
|
|
|
|
static bool hasAttribute(const DOM::Node &pNode, const TQString &attrName, const TQString &attrValue)
|
|
{
|
|
const DOM::Element element = (const DOM::Element) pNode;
|
|
DOM::Attr attr;
|
|
DOM::NamedNodeMap attrs = element.attributes();
|
|
unsigned long lmap = attrs.length();
|
|
for( unsigned int j=0; j<lmap; j++ ) {
|
|
attr = static_cast<DOM::Attr>(attrs.item(j));
|
|
if ((attr.name().string().upper() == attrName) &&
|
|
(attr.value().string().upper() == attrValue))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static bool hasChildNode(const DOM::Node &pNode, const TQString &nodeName)
|
|
{
|
|
DOM::Node child;
|
|
try
|
|
{
|
|
// We might throw a DOM exception
|
|
child = pNode.firstChild();
|
|
}
|
|
catch (...)
|
|
{
|
|
// No children, stop recursion here
|
|
child = DOM::Node();
|
|
}
|
|
|
|
while(!child.isNull()) {
|
|
if (child.nodeName().string().upper() == nodeName)
|
|
return true;
|
|
child = child.nextSibling();
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/* Transform DOM-Tree to HTML */
|
|
|
|
void ArchiveDialog::saveArchiveRecursive(const DOM::Node &pNode, const KURL& baseURL,
|
|
TQTextStream* _textStream, int indent)
|
|
{
|
|
const TQString nodeNameOrig(pNode.nodeName().string());
|
|
const TQString nodeName(pNode.nodeName().string().upper());
|
|
TQString text;
|
|
TQString strIndent;
|
|
strIndent.fill(' ', indent);
|
|
const DOM::Element element = (const DOM::Element) pNode;
|
|
DOM::Node child;
|
|
|
|
if ( !element.isNull() ) {
|
|
if (nodeName.at(0)=='-') {
|
|
/* Don't save khtml internal tags '-konq..'
|
|
* Approximating it with <DIV>
|
|
*/
|
|
text += "<DIV> <!-- -KONTQ_BLOCK -->";
|
|
} else if (nodeName == "BASE") {
|
|
/* Skip BASE, everything is relative to index.html
|
|
* Saving SCRIPT but they can cause trouble!
|
|
*/
|
|
} else if ((nodeName == "META") && hasAttribute(pNode, "HTTP-ETQUIV", "CONTENT-TYPE")) {
|
|
/* Skip content-type meta tag, we provide our own.
|
|
*/
|
|
} else {
|
|
if (!m_bPreserveWS) {
|
|
if (nodeName == "PRE") {
|
|
m_bPreserveWS = true;
|
|
}
|
|
text = strIndent;
|
|
}
|
|
text += "<" + nodeNameOrig;
|
|
TQString attributes;
|
|
TQString attrNameOrig, attrName, attrValue;
|
|
DOM::Attr attr;
|
|
DOM::NamedNodeMap attrs = element.attributes();
|
|
unsigned long lmap = attrs.length();
|
|
for( unsigned int j=0; j<lmap; j++ ) {
|
|
attr = static_cast<DOM::Attr>(attrs.item(j));
|
|
attrNameOrig = attr.name().string();
|
|
attrName = attrNameOrig.upper();
|
|
attrValue = attr.value().string();
|
|
|
|
#if 0
|
|
if ((nodeName == "FRAME" || nodeName == "IFRAME") && attrName == "SRC") {
|
|
//attrValue = handleLink(baseURL, attrValue);
|
|
|
|
/* Going recursively down creating a DOM-Tree for the Frame, second Level of recursion */
|
|
//## Add Termination criteria, on the other hand frames are not indefinetly nested, are they :)
|
|
|
|
KHTMLPart* part = new KHTMLPart();
|
|
KURL absoluteURL = getAbsoluteURL(baseURL, attrValue);
|
|
part->openURL(absoluteURL);
|
|
saveFile(getUniqueFileName(absoluteURL.fileName()), part);
|
|
delete part;
|
|
|
|
} else if
|
|
#endif
|
|
if ((nodeName == "LINK" && attrName == "HREF") || // Down load stylesheets, js-script, ..
|
|
((nodeName == "FRAME" || nodeName == "IFRAME") && attrName == "SRC") ||
|
|
((nodeName == "IMG" || nodeName == "INPUT" || nodeName == "SCRIPT") && attrName == "SRC") ||
|
|
((nodeName == "BODY" || nodeName == "TABLE" || nodeName == "TH" || nodeName == "TD") && attrName == "BACKGROUND")) {
|
|
// Some people use carriage return in file names and browsers support that!
|
|
attrValue = handleLink(baseURL, attrValue.replace(TQRegExp("\\s"), ""));
|
|
}
|
|
/*
|
|
* ## Make recursion level configurable
|
|
*/
|
|
/*
|
|
} else if (nodeName == "A" && attrName == "HREF") {
|
|
attrValue = handleLink(baseURL, attrValue);
|
|
*/
|
|
|
|
attributes += " " + attrName + "=\"" + attrValue + "\"";
|
|
}
|
|
if (!(attributes.isEmpty())){
|
|
text += " ";
|
|
}
|
|
text += attributes.simplifyWhiteSpace();
|
|
text += ">";
|
|
|
|
if (nodeName == "HTML") {
|
|
/* Search for a HEAD tag, if not found, generate one.
|
|
*/
|
|
if (!hasChildNode(pNode, "HEAD"))
|
|
text += "\n" + strIndent + " <HEAD>" CONTENT_TYPE "</HEAD>";
|
|
}
|
|
else if (nodeName == "HEAD") {
|
|
text += "\n" + strIndent + " " + CONTENT_TYPE;
|
|
}
|
|
}
|
|
} else {
|
|
const TQString& nodeValue(pNode.nodeValue().string());
|
|
if (!(nodeValue.isEmpty())) {
|
|
// Don't escape < > in JS or CSS
|
|
TQString parentNodeName = pNode.parentNode().nodeName().string().upper();
|
|
if (parentNodeName == "STYLE") {
|
|
text = analyzeInternalCSS(baseURL, pNode.nodeValue().string());
|
|
} else if (m_bPreserveWS) {
|
|
text = TQStyleSheet::escape(pNode.nodeValue().string());
|
|
} else if (parentNodeName == "SCRIPT") {
|
|
text = pNode.nodeValue().string();
|
|
} else {
|
|
text = strIndent + TQStyleSheet::escape(pNode.nodeValue().string());
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef DEBUG_WAR
|
|
kdDebug(90110) << "text:" << text << endl;
|
|
#endif
|
|
if (!(text.isEmpty())) {
|
|
(*_textStream) << text;
|
|
if (!m_bPreserveWS) {
|
|
(*_textStream) << endl;
|
|
}
|
|
}
|
|
|
|
try
|
|
{
|
|
// We might throw a DOM exception
|
|
child = pNode.firstChild();
|
|
}
|
|
catch (...)
|
|
{
|
|
// No children, stop recursion here
|
|
child = DOM::Node();
|
|
}
|
|
|
|
while(!child.isNull()) {
|
|
saveArchiveRecursive(child, baseURL, _textStream, indent+2);
|
|
child = child.nextSibling();
|
|
}
|
|
|
|
if (!(element.isNull())) {
|
|
if (nodeName == "AREA" || nodeName == "BASE" || nodeName == "BASEFONT" ||
|
|
nodeName == "BR" || nodeName == "COL" || nodeName == "FRAME" ||
|
|
nodeName == "HR" || nodeName == "IMG" || nodeName == "INPUT" ||
|
|
nodeName == "ISINDEX" || nodeName == "META" || nodeName == "PARAM") {
|
|
|
|
/* Closing Tag is forbidden, see HTML 4.01 Specs: Index of Elements */
|
|
|
|
} else {
|
|
if (!m_bPreserveWS) {
|
|
text = strIndent;
|
|
} else {
|
|
text ="";
|
|
}
|
|
if (nodeName.at(0)=='-') {
|
|
text += "</DIV> <!-- -KONTQ_BLOCK -->";
|
|
} else {
|
|
text += "</" + pNode.nodeName().string() + ">";
|
|
if (nodeName == "PRE") {
|
|
m_bPreserveWS = false;
|
|
}
|
|
}
|
|
#ifdef DEBUG_WAR
|
|
kdDebug(90110) << text << endl;
|
|
#endif
|
|
if (!(text.isEmpty())) {
|
|
(*_textStream) << text;
|
|
if (!m_bPreserveWS) {
|
|
(*_textStream) << endl;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Extract the URL, download it's content and return an unique name for the link */
|
|
|
|
TQString ArchiveDialog::handleLink(const KURL& _url, const TQString& _link)
|
|
{
|
|
KURL url(getAbsoluteURL(_url, _link));
|
|
|
|
TQString tarFileName;
|
|
if (kapp->authorizeURLAction("redirect", _url, url))
|
|
{
|
|
if (m_state==Retrieving)
|
|
m_urlsToDownload.append(url);
|
|
else if (m_state==Saving)
|
|
tarFileName = m_downloadedURLDict[url.url()];
|
|
}
|
|
|
|
return tarFileName;
|
|
}
|
|
|
|
void ArchiveDialog::downloadNext()
|
|
{
|
|
if (m_iterator>=m_urlsToDownload.count())
|
|
{
|
|
// We've already downloaded all the files we wanted, let's save them
|
|
setSavingState();
|
|
return;
|
|
}
|
|
|
|
KURL url=m_urlsToDownload[m_iterator];
|
|
|
|
#ifdef DEBUG_WAR
|
|
kdDebug(90110) << "URL : " << url.url() << endl;
|
|
#endif
|
|
TQString tarFileName;
|
|
|
|
// Only download file once
|
|
if (m_downloadedURLDict.contains(url.url())) {
|
|
tarFileName = m_downloadedURLDict[url.url()];
|
|
#ifdef DEBUG_WAR
|
|
kdDebug(90110) << "File already downloaded: " << url.url()
|
|
<< m_downloadedURLDict.count() << endl;
|
|
#endif
|
|
m_iterator++;
|
|
downloadNext();
|
|
return;
|
|
} else {
|
|
|
|
// Gets the name of a temporary file into m_tmpFileName
|
|
delete m_tmpFile;
|
|
m_tmpFile=new KTempFile();
|
|
m_tmpFile->close();
|
|
TQFile::remove(m_tmpFile->name());
|
|
kdDebug(90110) << "downloading: " << url.url() << " to: " << m_tmpFile->name() << endl;
|
|
KURL dsturl;
|
|
dsturl.setPath(m_tmpFile->name());
|
|
KIO::Job *job=KIO::file_copy(url, dsturl, -1, false, false, false);
|
|
job->addMetaData("cache", "cache"); // Use entry from cache if available.
|
|
connect(job, TQT_SIGNAL(result( KIO::Job *)), this, TQT_SLOT(finishedDownloadingURL( KIO::Job *)) );
|
|
|
|
m_currentLVI=new TQListViewItem(m_widget->listView, url.prettyURL());
|
|
m_widget->listView->insertItem( m_currentLVI );
|
|
m_currentLVI->setText(1,i18n("Downloading"));
|
|
}
|
|
#ifdef DEBUG_WAR
|
|
kdDebug(90110) << "TarFileName: [" << tarFileName << "]" << endl << endl;
|
|
#endif
|
|
}
|
|
|
|
void ArchiveDialog::finishedDownloadingURL( KIO::Job *job )
|
|
{
|
|
if ( job->error() )
|
|
{
|
|
// TQString s=job->errorString();
|
|
m_currentLVI->setText(1,i18n("Error"));
|
|
}
|
|
else
|
|
m_currentLVI->setText(1,i18n("Ok"));
|
|
|
|
m_widget->progressBar->advance(1);
|
|
|
|
|
|
KURL url=m_urlsToDownload[m_iterator];
|
|
|
|
TQString tarFileName = getUniqueFileName(url.fileName());
|
|
|
|
// Add file to Tar-Ball
|
|
TQFile file(m_tmpFile->name());
|
|
file.open(IO_ReadOnly);
|
|
m_tarBall->writeFile(tarFileName, TQString(), TQString(), file.size(), file.readAll());
|
|
file.close();
|
|
m_tmpFile->unlink();
|
|
delete m_tmpFile;
|
|
m_tmpFile=0;
|
|
|
|
// Add URL to downloaded URLs
|
|
|
|
m_downloadedURLDict.insert(url.url(), tarFileName);
|
|
m_linkDict.insert(tarFileName, TQString(""));
|
|
|
|
m_iterator++;
|
|
downloadNext();
|
|
}
|
|
|
|
/* Create an absolute URL for download */
|
|
|
|
KURL ArchiveDialog::getAbsoluteURL(const KURL& _url, const TQString& _link)
|
|
{
|
|
// Does all the magic for me
|
|
return KURL(_url, _link);
|
|
}
|
|
|
|
/* Adds an id to a fileName to make it unique relative to the Tar-Ball */
|
|
|
|
TQString ArchiveDialog::getUniqueFileName(const TQString& fileName)
|
|
{
|
|
// Name clash -> add unique id
|
|
static int id=2;
|
|
TQString uniqueFileName(fileName);
|
|
|
|
#ifdef DEBUG_WAR
|
|
kdDebug(90110) << "getUniqueFileName(..): [" << fileName << "]" << endl;
|
|
#endif
|
|
|
|
while (uniqueFileName.isEmpty() || m_linkDict.contains(uniqueFileName))
|
|
uniqueFileName = TQString::number(id++) + fileName;
|
|
|
|
return uniqueFileName;
|
|
}
|
|
|
|
/* Search for Images in CSS, extract them and adjust CSS */
|
|
|
|
TQString ArchiveDialog::analyzeInternalCSS(const KURL& _url, const TQString& string)
|
|
{
|
|
#ifdef DEBUG_WAR
|
|
kdDebug () << "analyzeInternalCSS" << endl;
|
|
#endif
|
|
|
|
TQString str(string);
|
|
int pos = 0;
|
|
int startUrl = 0;
|
|
int endUrl = 0;
|
|
int length = string.length();
|
|
while (pos < length && pos >= 0) {
|
|
pos = str.find("url(", pos);
|
|
if (pos!=-1) {
|
|
pos += 4; // url(
|
|
|
|
if (str[pos]=='"' || str[pos]=='\'') // CSS 'feature'
|
|
pos++;
|
|
startUrl = pos;
|
|
pos = str.find(")",startUrl);
|
|
endUrl = pos;
|
|
if (str[pos-1]=='"' || str[pos-1]=='\'') // CSS 'feature'
|
|
endUrl--;
|
|
TQString url = str.mid(startUrl, endUrl-startUrl);
|
|
|
|
#ifdef DEBUG_WAR
|
|
kdDebug () << "url: " << url << endl;
|
|
#endif
|
|
|
|
url = handleLink(_url, url);
|
|
|
|
#ifdef DEBUG_WAR
|
|
kdDebug () << "url: " << url << endl;
|
|
#endif
|
|
|
|
str = str.replace(startUrl, endUrl-startUrl, url);
|
|
pos++;
|
|
}
|
|
}
|
|
return str;
|
|
}
|
|
|
|
#include "archivedialog.moc"
|