You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
416 lines
13 KiB
416 lines
13 KiB
/***************************************************************************
|
|
copyright : (C) 2006 by Robby Stephenson
|
|
email : robby@periapsis.org
|
|
***************************************************************************/
|
|
|
|
/***************************************************************************
|
|
* *
|
|
* This program is free software; you can redistribute it and/or modify *
|
|
* it under the terms of version 2 of the GNU General Public License as *
|
|
* published by the Free Software Foundation; *
|
|
* *
|
|
***************************************************************************/
|
|
|
|
#include "ibsfetcher.h"
|
|
#include "messagehandler.h"
|
|
#include "../tellico_kernel.h"
|
|
#include "../tellico_utils.h"
|
|
#include "../collections/bookcollection.h"
|
|
#include "../entry.h"
|
|
#include "../filehandler.h"
|
|
#include "../latin1literal.h"
|
|
#include "../imagefactory.h"
|
|
#include "../tellico_debug.h"
|
|
|
|
#include <klocale.h>
|
|
#include <kconfig.h>
|
|
#include <kio/job.h>
|
|
|
|
#include <tqregexp.h>
|
|
#include <tqlayout.h>
|
|
#include <tqlabel.h>
|
|
#include <tqfile.h>
|
|
|
|
//#define IBS_TEST
|
|
|
|
namespace {
|
|
static const char* IBS_BASE_URL = "http://www.internetbookshop.it/ser/serpge.asp";
|
|
}
|
|
|
|
using Tellico::Fetch::IBSFetcher;
|
|
|
|
IBSFetcher::IBSFetcher(TQObject* parent_, const char* name_ /*=0*/)
|
|
: Fetcher(parent_, name_), m_started(false) {
|
|
}
|
|
|
|
TQString IBSFetcher::defaultName() {
|
|
return i18n("Internet Bookshop (ibs.it)");
|
|
}
|
|
|
|
TQString IBSFetcher::source() const {
|
|
return m_name.isEmpty() ? defaultName() : m_name;
|
|
}
|
|
|
|
bool IBSFetcher::canFetch(int type) const {
|
|
return type == Data::Collection::Book || type == Data::Collection::Bibtex;
|
|
}
|
|
|
|
void IBSFetcher::readConfigHook(const KConfigGroup& config_) {
|
|
Q_UNUSED(config_);
|
|
}
|
|
|
|
void IBSFetcher::search(FetchKey key_, const TQString& value_) {
|
|
m_started = true;
|
|
m_matches.clear();
|
|
|
|
#ifdef IBS_TEST
|
|
KURL u = KURL::fromPathOrURL(TQString::fromLatin1("/home/robby/ibs.html"));
|
|
#else
|
|
KURL u(TQString::fromLatin1(IBS_BASE_URL));
|
|
|
|
if(!canFetch(Kernel::self()->collectionType())) {
|
|
message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning);
|
|
stop();
|
|
return;
|
|
}
|
|
|
|
switch(key_) {
|
|
case Title:
|
|
u.addQueryItem(TQString::fromLatin1("Type"), TQString::fromLatin1("keyword"));
|
|
u.addQueryItem(TQString::fromLatin1("T"), value_);
|
|
break;
|
|
|
|
case Person:
|
|
u.addQueryItem(TQString::fromLatin1("Type"), TQString::fromLatin1("keyword"));
|
|
u.addQueryItem(TQString::fromLatin1("A"), value_);
|
|
break;
|
|
|
|
case ISBN:
|
|
{
|
|
TQString s = value_;
|
|
s.remove('-');
|
|
// limit to first isbn
|
|
s = s.section(';', 0, 0);
|
|
u.setFileName(TQString::fromLatin1("serdsp.asp"));
|
|
u.addQueryItem(TQString::fromLatin1("isbn"), s);
|
|
}
|
|
break;
|
|
|
|
case Keyword:
|
|
u.addQueryItem(TQString::fromLatin1("Type"), TQString::fromLatin1("keyword"));
|
|
u.addQueryItem(TQString::fromLatin1("S"), value_);
|
|
break;
|
|
|
|
default:
|
|
kdWarning() << "IBSFetcher::search() - key not recognized: " << key_ << endl;
|
|
stop();
|
|
return;
|
|
}
|
|
#endif
|
|
// myDebug() << "IBSFetcher::search() - url: " << u.url() << endl;
|
|
|
|
m_job = KIO::get(u, false, false);
|
|
connect(m_job, TQT_SIGNAL(data(KIO::Job*, const TQByteArray&)),
|
|
TQT_SLOT(slotData(KIO::Job*, const TQByteArray&)));
|
|
if(key_ == ISBN) {
|
|
connect(m_job, TQT_SIGNAL(result(KIO::Job*)), TQT_SLOT(slotCompleteISBN(KIO::Job*)));
|
|
} else {
|
|
connect(m_job, TQT_SIGNAL(result(KIO::Job*)), TQT_SLOT(slotComplete(KIO::Job*)));
|
|
}
|
|
}
|
|
|
|
void IBSFetcher::stop() {
|
|
if(!m_started) {
|
|
return;
|
|
}
|
|
|
|
if(m_job) {
|
|
m_job->kill();
|
|
m_job = 0;
|
|
}
|
|
m_data.truncate(0);
|
|
m_started = false;
|
|
emit signalDone(this);
|
|
}
|
|
|
|
void IBSFetcher::slotData(KIO::Job*, const TQByteArray& data_) {
|
|
TQDataStream stream(m_data, IO_WriteOnly | IO_Append);
|
|
stream.writeRawBytes(data_.data(), data_.size());
|
|
}
|
|
|
|
void IBSFetcher::slotComplete(KIO::Job* job_) {
|
|
// since the fetch is done, don't worry about holding the job pointer
|
|
m_job = 0;
|
|
|
|
if(job_->error()) {
|
|
job_->showErrorDialog(Kernel::self()->widget());
|
|
stop();
|
|
return;
|
|
}
|
|
|
|
if(m_data.isEmpty()) {
|
|
myDebug() << "IBSFetcher::slotComplete() - no data" << endl;
|
|
stop();
|
|
return;
|
|
}
|
|
|
|
TQString s = Tellico::decodeHTML(TQString(m_data));
|
|
// really specific regexp
|
|
TQString pat = TQString::fromLatin1("http://www.internetbookshop.it/code/");
|
|
TQRegExp anchorRx(TQString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*[\"'](") +
|
|
TQRegExp::escape(pat) +
|
|
TQString::fromLatin1("[^\"]*)\"[^>]*><b>([^<]+)<"), false);
|
|
anchorRx.setMinimal(true);
|
|
TQRegExp tagRx(TQString::fromLatin1("<.*>"));
|
|
tagRx.setMinimal(true);
|
|
|
|
TQString u, t, d;
|
|
int pos2;
|
|
for(int pos = anchorRx.search(s); m_started && pos > -1; pos = anchorRx.search(s, pos+anchorRx.matchedLength())) {
|
|
if(!u.isEmpty()) {
|
|
SearchResult* r = new SearchResult(this, t, d, TQString());
|
|
emit signalResultFound(r);
|
|
|
|
#ifdef IBS_TEST
|
|
KURL url = KURL::fromPathOrURL(TQString::fromLatin1("/home/robby/ibs2.html"));
|
|
#else
|
|
// the url probable contains & so be careful
|
|
KURL url = u.replace(TQString::fromLatin1("&"), TQChar('&'));
|
|
#endif
|
|
m_matches.insert(r->uid, url);
|
|
|
|
u.truncate(0);
|
|
t.truncate(0);
|
|
d.truncate(0);
|
|
}
|
|
u = anchorRx.cap(1);
|
|
t = anchorRx.cap(2);
|
|
pos2 = s.find(TQString::fromLatin1("<br>"), pos, false);
|
|
if(pos2 > -1) {
|
|
int pos3 = s.find(TQString::fromLatin1("<br>"), pos2+1, false);
|
|
if(pos3 > -1) {
|
|
d = s.mid(pos2, pos3-pos2).remove(tagRx).simplifyWhiteSpace();
|
|
}
|
|
}
|
|
}
|
|
#ifndef IBS_TEST
|
|
if(!u.isEmpty()) {
|
|
SearchResult* r = new SearchResult(this, t, d, TQString());
|
|
emit signalResultFound(r);
|
|
m_matches.insert(r->uid, u.replace(TQString::fromLatin1("&"), TQChar('&')));
|
|
}
|
|
#endif
|
|
|
|
stop();
|
|
}
|
|
|
|
void IBSFetcher::slotCompleteISBN(KIO::Job* job_) {
|
|
// since the fetch is done, don't worry about holding the job pointer
|
|
m_job = 0;
|
|
|
|
if(job_->error()) {
|
|
job_->showErrorDialog(Kernel::self()->widget());
|
|
stop();
|
|
return;
|
|
}
|
|
|
|
if(m_data.isEmpty()) {
|
|
myDebug() << "IBSFetcher::slotCompleteISBN() - no data" << endl;
|
|
stop();
|
|
return;
|
|
}
|
|
|
|
TQString str = Tellico::decodeHTML(TQString(m_data));
|
|
if(str.find(TQString::fromLatin1("Libro non presente"), 0, false /* cas-sensitive */) > -1) {
|
|
stop();
|
|
return;
|
|
}
|
|
Data::EntryPtr entry = parseEntry(str);
|
|
if(entry) {
|
|
TQString desc = entry->field(TQString::fromLatin1("author"))
|
|
+ '/' + entry->field(TQString::fromLatin1("publisher"));
|
|
SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(TQString::fromLatin1("isbn")));
|
|
emit signalResultFound(r);
|
|
m_matches.insert(r->uid, static_cast<KIO::TransferJob*>(job_)->url().url());
|
|
}
|
|
|
|
stop();
|
|
}
|
|
|
|
Tellico::Data::EntryPtr IBSFetcher::fetchEntry(uint uid_) {
|
|
// if we already grabbed this one, then just pull it out of the dict
|
|
Data::EntryPtr entry = m_entries[uid_];
|
|
if(entry) {
|
|
return entry;
|
|
}
|
|
|
|
KURL url = m_matches[uid_];
|
|
if(url.isEmpty()) {
|
|
kdWarning() << "IBSFetcher::fetchEntry() - no url in map" << endl;
|
|
return 0;
|
|
}
|
|
|
|
TQString results = Tellico::decodeHTML(FileHandler::readTextFile(url, true));
|
|
if(results.isEmpty()) {
|
|
myDebug() << "IBSFetcher::fetchEntry() - no text results" << endl;
|
|
return 0;
|
|
}
|
|
|
|
// myDebug() << url.url() << endl;
|
|
#if 0
|
|
kdWarning() << "Remove debug from ibsfetcher.cpp" << endl;
|
|
TQFile f(TQString::fromLatin1("/tmp/test.html"));
|
|
if(f.open(IO_WriteOnly)) {
|
|
TQTextStream t(&f);
|
|
t.setEncoding(TQTextStream::UnicodeUTF8);
|
|
t << results;
|
|
}
|
|
f.close();
|
|
#endif
|
|
|
|
entry = parseEntry(results);
|
|
if(!entry) {
|
|
myDebug() << "IBSFetcher::fetchEntry() - error in processing entry" << endl;
|
|
return 0;
|
|
}
|
|
m_entries.insert(uid_, entry); // keep for later
|
|
return entry;
|
|
}
|
|
|
|
Tellico::Data::EntryPtr IBSFetcher::parseEntry(const TQString& str_) {
|
|
// myDebug() << "IBSFetcher::parseEntry()" << endl;
|
|
// class might be anime_info_top
|
|
TQString pat = TQString::fromLatin1("%1(?:<[^>]+>)+([^<>\\s][^<>]+)");
|
|
|
|
TQRegExp isbnRx(TQString::fromLatin1("isbn=([\\dxX]{13})"), false);
|
|
TQString isbn;
|
|
int pos = isbnRx.search(str_);
|
|
if(pos > -1) {
|
|
isbn = isbnRx.cap(1);
|
|
}
|
|
|
|
Data::CollPtr coll = new Data::BookCollection(true);
|
|
|
|
// map captions in HTML to field names
|
|
TQMap<TQString, TQString> fieldMap;
|
|
fieldMap.insert(TQString::fromLatin1("Titolo"), TQString::fromLatin1("title"));
|
|
fieldMap.insert(TQString::fromLatin1("Autore"), TQString::fromLatin1("author"));
|
|
fieldMap.insert(TQString::fromLatin1("Anno"), TQString::fromLatin1("pub_year"));
|
|
fieldMap.insert(TQString::fromLatin1("Categoria"), TQString::fromLatin1("genre"));
|
|
fieldMap.insert(TQString::fromLatin1("Rilegatura"), TQString::fromLatin1("binding"));
|
|
fieldMap.insert(TQString::fromLatin1("Editore"), TQString::fromLatin1("publisher"));
|
|
fieldMap.insert(TQString::fromLatin1("Dati"), TQString::fromLatin1("edition"));
|
|
|
|
TQRegExp pagesRx(TQString::fromLatin1("(\\d+) p\\.(\\s*,\\s*)?"));
|
|
Data::EntryPtr entry = new Data::Entry(coll);
|
|
|
|
for(TQMap<TQString, TQString>::Iterator it = fieldMap.begin(); it != fieldMap.end(); ++it) {
|
|
TQRegExp infoRx(pat.arg(it.key()));
|
|
pos = infoRx.search(str_);
|
|
if(pos > -1) {
|
|
if(it.data() == Latin1Literal("edition")) {
|
|
int pos2 = pagesRx.search(infoRx.cap(1));
|
|
if(pos2 > -1) {
|
|
entry->setField(TQString::fromLatin1("pages"), pagesRx.cap(1));
|
|
entry->setField(it.data(), infoRx.cap(1).remove(pagesRx));
|
|
} else {
|
|
entry->setField(it.data(), infoRx.cap(1));
|
|
}
|
|
} else {
|
|
entry->setField(it.data(), infoRx.cap(1));
|
|
}
|
|
}
|
|
}
|
|
|
|
// image
|
|
if(!isbn.isEmpty()) {
|
|
entry->setField(TQString::fromLatin1("isbn"), isbn);
|
|
#if 1
|
|
TQString imgURL = TQString::fromLatin1("http://giotto.ibs.it/cop/copt13.asp?f=%1").arg(isbn);
|
|
myLog() << "IBSFetcher() - cover = " << imgURL << endl;
|
|
TQString id = ImageFactory::addImage(imgURL, true, TQString::fromLatin1("http://internetbookshop.it"));
|
|
if(!id.isEmpty()) {
|
|
entry->setField(TQString::fromLatin1("cover"), id);
|
|
}
|
|
#else
|
|
TQRegExp imgRx(TQString::fromLatin1("<img\\s+[^>]*\\s*src\\s*=\\s*\"(http://[^/]*\\.ibs\\.it/[^\"]+e=%1)").arg(isbn));
|
|
imgRx.setMinimal(true);
|
|
pos = imgRx.search(str_);
|
|
if(pos > -1) {
|
|
myLog() << "IBSFetcher() - cover = " << imgRx.cap(1) << endl;
|
|
TQString id = ImageFactory::addImage(imgRx.cap(1), true, TQString::fromLatin1("http://internetbookshop.it"));
|
|
if(!id.isEmpty()) {
|
|
entry->setField(TQString::fromLatin1("cover"), id);
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
// now look for description
|
|
TQRegExp descRx(TQString::fromLatin1("Descrizione(?:<[^>]+>)+([^<>\\s].+)</span>"), false);
|
|
descRx.setMinimal(true);
|
|
pos = descRx.search(str_);
|
|
if(pos == -1) {
|
|
descRx.setPattern(TQString::fromLatin1("In sintesi(?:<[^>]+>)+([^<>\\s].+)</span>"));
|
|
pos = descRx.search(str_);
|
|
}
|
|
if(pos > -1) {
|
|
Data::FieldPtr f = new Data::Field(TQString::fromLatin1("plot"), i18n("Plot Summary"), Data::Field::Para);
|
|
coll->addField(f);
|
|
entry->setField(f, descRx.cap(1).simplifyWhiteSpace());
|
|
}
|
|
|
|
// IBS switches the surname and family name of the author
|
|
TQStringList names = entry->fields(TQString::fromLatin1("author"), false);
|
|
if(!names.isEmpty() && !names[0].isEmpty()) {
|
|
for(TQStringList::Iterator it = names.begin(); it != names.end(); ++it) {
|
|
if((*it).find(',') > -1) {
|
|
continue; // skip if it has a comma
|
|
}
|
|
TQStringList words = TQStringList::split(' ', *it);
|
|
if(words.isEmpty()) {
|
|
continue;
|
|
}
|
|
// put first word in back
|
|
words.append(words[0]);
|
|
words.pop_front();
|
|
*it = words.join(TQChar(' '));
|
|
}
|
|
entry->setField(TQString::fromLatin1("author"), names.join(TQString::fromLatin1("; ")));
|
|
}
|
|
return entry;
|
|
}
|
|
|
|
void IBSFetcher::updateEntry(Data::EntryPtr entry_) {
|
|
TQString isbn = entry_->field(TQString::fromLatin1("isbn"));
|
|
if(!isbn.isEmpty()) {
|
|
search(Fetch::ISBN, isbn);
|
|
return;
|
|
}
|
|
TQString t = entry_->field(TQString::fromLatin1("title"));
|
|
if(!t.isEmpty()) {
|
|
search(Fetch::Title, t);
|
|
return;
|
|
}
|
|
|
|
myDebug() << "IBSFetcher::updateEntry() - insufficient info to search" << endl;
|
|
emit signalDone(this); // always need to emit this if not continuing with the search
|
|
}
|
|
|
|
Tellico::Fetch::ConfigWidget* IBSFetcher::configWidget(TQWidget* parent_) const {
|
|
return new IBSFetcher::ConfigWidget(parent_);
|
|
}
|
|
|
|
IBSFetcher::ConfigWidget::ConfigWidget(TQWidget* parent_)
|
|
: Fetch::ConfigWidget(parent_) {
|
|
TQVBoxLayout* l = new TQVBoxLayout(optionsWidget());
|
|
l->addWidget(new TQLabel(i18n("This source has no options."), optionsWidget()));
|
|
l->addStretch();
|
|
}
|
|
|
|
TQString IBSFetcher::ConfigWidget::preferredName() const {
|
|
return IBSFetcher::defaultName();
|
|
}
|
|
|
|
#include "ibsfetcher.moc"
|