|
|
|
/***************************************************************************
|
|
|
|
copyright : (C) 2004-2006 by Robby Stephenson
|
|
|
|
email : robby@periapsis.org
|
|
|
|
***************************************************************************/
|
|
|
|
|
|
|
|
/***************************************************************************
|
|
|
|
* *
|
|
|
|
* This program is free software; you can redistribute it and/or modify *
|
|
|
|
* it under the terms of version 2 of the GNU General Public License as *
|
|
|
|
* published by the Free Software Foundation; *
|
|
|
|
* *
|
|
|
|
***************************************************************************/
|
|
|
|
|
|
|
|
#include "imdbfetcher.h"
|
|
|
|
#include "../tellico_kernel.h"
|
|
|
|
#include "../collections/videocollection.h"
|
|
|
|
#include "../entry.h"
|
|
|
|
#include "../field.h"
|
|
|
|
#include "../filehandler.h"
|
|
|
|
#include "../latin1literal.h"
|
|
|
|
#include "../imagefactory.h"
|
|
|
|
#include "../tellico_utils.h"
|
|
|
|
#include "../gui/listboxtext.h"
|
|
|
|
#include "../tellico_debug.h"
|
|
|
|
|
|
|
|
#include <klocale.h>
|
|
|
|
#include <kdialogbase.h>
|
|
|
|
#include <kconfig.h>
|
|
|
|
#include <klineedit.h>
|
|
|
|
#include <knuminput.h>
|
|
|
|
|
|
|
|
#include <tqregexp.h>
|
|
|
|
#include <tqfile.h>
|
|
|
|
#include <tqmap.h>
|
|
|
|
#include <tqvbox.h>
|
|
|
|
#include <tqlabel.h>
|
|
|
|
#include <tqlistbox.h>
|
|
|
|
#include <tqwhatsthis.h>
|
|
|
|
#include <tqlayout.h>
|
|
|
|
#include <tqcheckbox.h>
|
|
|
|
#include <tqvgroupbox.h>
|
|
|
|
|
|
|
|
//#define IMDB_TEST
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
static const char* IMDB_SERVER = "akas.imdb.com";
|
|
|
|
static const uint IMDB_MAX_RESULTS = 20;
|
|
|
|
static const TQString sep = TQString::fromLatin1("; ");
|
|
|
|
}
|
|
|
|
|
|
|
|
using Tellico::Fetch::IMDBFetcher;
|
|
|
|
|
|
|
|
TQRegExp* IMDBFetcher::s_tagRx = 0;
|
|
|
|
TQRegExp* IMDBFetcher::s_anchorRx = 0;
|
|
|
|
TQRegExp* IMDBFetcher::s_anchorTitleRx = 0;
|
|
|
|
TQRegExp* IMDBFetcher::s_anchorNameRx = 0;
|
|
|
|
TQRegExp* IMDBFetcher::s_titleRx = 0;
|
|
|
|
|
|
|
|
// static
|
|
|
|
void IMDBFetcher::initRegExps() {
|
|
|
|
s_tagRx = new TQRegExp(TQString::fromLatin1("<.*>"));
|
|
|
|
s_tagRx->setMinimal(true);
|
|
|
|
|
|
|
|
s_anchorRx = new TQRegExp(TQString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"[^<]*>([^<]*)</a>"), false);
|
|
|
|
s_anchorRx->setMinimal(true);
|
|
|
|
|
|
|
|
s_anchorTitleRx = new TQRegExp(TQString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*\"([^\"]*/title/[^\"]*)\"[^<]*>([^<]*)</a>"), false);
|
|
|
|
s_anchorTitleRx->setMinimal(true);
|
|
|
|
|
|
|
|
s_anchorNameRx = new TQRegExp(TQString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*\"([^\"]*/name/[^\"]*)\"[^<]*>([^<]*)</a>"), false);
|
|
|
|
s_anchorNameRx->setMinimal(true);
|
|
|
|
|
|
|
|
s_titleRx = new TQRegExp(TQString::fromLatin1("<title>(.*)</title>"), false);
|
|
|
|
s_titleRx->setMinimal(true);
|
|
|
|
}
|
|
|
|
|
|
|
|
IMDBFetcher::IMDBFetcher(TQObject* parent_, const char* name_) : Fetcher(parent_, name_),
|
|
|
|
m_job(0), m_started(false), m_fetchImages(true), m_host(TQString::fromLatin1(IMDB_SERVER)),
|
|
|
|
m_limit(IMDB_MAX_RESULTS), m_countOffset(0) {
|
|
|
|
if(!s_tagRx) {
|
|
|
|
initRegExps();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
IMDBFetcher::~IMDBFetcher() {
|
|
|
|
}
|
|
|
|
|
|
|
|
TQString IMDBFetcher::defaultName() {
|
|
|
|
return i18n("Internet Movie Database");
|
|
|
|
}
|
|
|
|
|
|
|
|
TQString IMDBFetcher::source() const {
|
|
|
|
return m_name.isEmpty() ? defaultName() : m_name;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool IMDBFetcher::canFetch(int type) const {
|
|
|
|
return type == Data::Collection::Video;
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::readConfigHook(const KConfigGroup& config_) {
|
|
|
|
TQString h = config_.readEntry("Host");
|
|
|
|
if(!h.isEmpty()) {
|
|
|
|
m_host = h;
|
|
|
|
}
|
|
|
|
m_numCast = config_.readNumEntry("Max Cast", 10);
|
|
|
|
m_fetchImages = config_.readBoolEntry("Fetch Images", true);
|
|
|
|
m_fields = config_.readListEntry("Custom Fields");
|
|
|
|
}
|
|
|
|
|
|
|
|
// multiple values not supported
|
|
|
|
void IMDBFetcher::search(FetchKey key_, const TQString& value_) {
|
|
|
|
m_key = key_;
|
|
|
|
m_value = value_;
|
|
|
|
m_started = true;
|
|
|
|
m_redirected = false;
|
|
|
|
m_data.truncate(0);
|
|
|
|
m_matches.clear();
|
|
|
|
m_popularTitles.truncate(0);
|
|
|
|
m_exactTitles.truncate(0);
|
|
|
|
m_partialTitles.truncate(0);
|
|
|
|
m_currentTitleBlock = Unknown;
|
|
|
|
m_countOffset = 0;
|
|
|
|
|
|
|
|
// only search if current collection is a video collection
|
|
|
|
if(Kernel::self()->collectionType() != Data::Collection::Video) {
|
|
|
|
myDebug() << "IMDBFetcher::search() - collection type mismatch, stopping" << endl;
|
|
|
|
stop();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef IMDB_TEST
|
|
|
|
if(m_key == Title) {
|
|
|
|
m_url = KURL::fromPathOrURL(TQString::fromLatin1("/home/robby/imdb-title.html"));
|
|
|
|
m_redirected = false;
|
|
|
|
} else {
|
|
|
|
m_url = KURL::fromPathOrURL(TQString::fromLatin1("/home/robby/imdb-name.html"));
|
|
|
|
m_redirected = true;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
m_url = KURL();
|
|
|
|
m_url.setProtocol(TQString::fromLatin1("http"));
|
|
|
|
m_url.setHost(m_host.isEmpty() ? TQString::fromLatin1(IMDB_SERVER) : m_host);
|
|
|
|
m_url.setPath(TQString::fromLatin1("/find"));
|
|
|
|
|
|
|
|
switch(key_) {
|
|
|
|
case Title:
|
|
|
|
m_url.addQueryItem(TQString::fromLatin1("s"), TQString::fromLatin1("tt"));
|
|
|
|
break;
|
|
|
|
|
|
|
|
case Person:
|
|
|
|
m_url.addQueryItem(TQString::fromLatin1("s"), TQString::fromLatin1("nm"));
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
kdWarning() << "IMDBFetcher::search() - FetchKey not supported" << endl;
|
|
|
|
stop();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// as far as I can tell, the url encoding should always be iso-8859-1
|
|
|
|
// not utf-8
|
|
|
|
m_url.addQueryItem(TQString::fromLatin1("q"), value_, 4 /* iso-8859-1 */);
|
|
|
|
|
|
|
|
// myDebug() << "IMDBFetcher::search() url = " << m_url << endl;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
m_job = KIO::get(m_url, false, false);
|
|
|
|
connect(m_job, TQT_SIGNAL(data(KIO::Job*, const TQByteArray&)),
|
|
|
|
TQT_SLOT(slotData(KIO::Job*, const TQByteArray&)));
|
|
|
|
connect(m_job, TQT_SIGNAL(result(KIO::Job*)),
|
|
|
|
TQT_SLOT(slotComplete(KIO::Job*)));
|
|
|
|
connect(m_job, TQT_SIGNAL(redirection(KIO::Job *, const KURL&)),
|
|
|
|
TQT_SLOT(slotRedirection(KIO::Job*, const KURL&)));
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::continueSearch() {
|
|
|
|
m_started = true;
|
|
|
|
m_limit += IMDB_MAX_RESULTS;
|
|
|
|
|
|
|
|
if(m_currentTitleBlock == Popular) {
|
|
|
|
parseTitleBlock(m_popularTitles);
|
|
|
|
// if the offset is 0, then we need to be looking at the next block
|
|
|
|
m_currentTitleBlock = m_countOffset == 0 ? Exact : Popular;
|
|
|
|
}
|
|
|
|
|
|
|
|
// current title block might have changed
|
|
|
|
if(m_currentTitleBlock == Exact) {
|
|
|
|
parseTitleBlock(m_exactTitles);
|
|
|
|
m_currentTitleBlock = m_countOffset == 0 ? Partial : Exact;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(m_currentTitleBlock == Partial) {
|
|
|
|
parseTitleBlock(m_partialTitles);
|
|
|
|
m_currentTitleBlock = m_countOffset == 0 ? Unknown : Partial;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(m_currentTitleBlock == SinglePerson) {
|
|
|
|
parseSingleNameResult();
|
|
|
|
}
|
|
|
|
|
|
|
|
stop();
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::stop() {
|
|
|
|
if(!m_started) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// myLog() << "IMDBFetcher::stop()" << endl;
|
|
|
|
if(m_job) {
|
|
|
|
m_job->kill();
|
|
|
|
m_job = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
m_started = false;
|
|
|
|
m_redirected = false;
|
|
|
|
|
|
|
|
emit signalDone(this);
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::slotData(KIO::Job*, const TQByteArray& data_) {
|
|
|
|
TQDataStream stream(m_data, IO_WriteOnly | IO_Append);
|
|
|
|
stream.writeRawBytes(data_.data(), data_.size());
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::slotRedirection(KIO::Job*, const KURL& toURL_) {
|
|
|
|
m_url = toURL_;
|
|
|
|
m_redirected = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::slotComplete(KIO::Job* job_) {
|
|
|
|
// since the fetch is done, don't worry about holding the job pointer
|
|
|
|
m_job = 0;
|
|
|
|
|
|
|
|
if(job_->error()) {
|
|
|
|
job_->showErrorDialog(Kernel::self()->widget());
|
|
|
|
stop();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(m_data.isEmpty()) {
|
|
|
|
stop();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// a single result was found if we got redirected
|
|
|
|
if(m_key == Title) {
|
|
|
|
if(m_redirected) {
|
|
|
|
parseSingleTitleResult();
|
|
|
|
} else {
|
|
|
|
parseMultipleTitleResults();
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if(m_redirected) {
|
|
|
|
parseSingleNameResult();
|
|
|
|
} else {
|
|
|
|
parseMultipleNameResults();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::parseSingleTitleResult() {
|
|
|
|
// myDebug() << "IMDBFetcher::parseSingleTitleResult()" << endl;
|
|
|
|
s_titleRx->search(Tellico::decodeHTML(TQString(m_data)));
|
|
|
|
// split title at parenthesis
|
|
|
|
const TQString cap1 = s_titleRx->cap(1);
|
|
|
|
int pPos = cap1.find('(');
|
|
|
|
// FIXME: maybe remove parentheses here?
|
|
|
|
SearchResult* r = new SearchResult(this,
|
|
|
|
pPos == -1 ? cap1 : cap1.left(pPos),
|
|
|
|
pPos == -1 ? TQString() : cap1.mid(pPos),
|
|
|
|
TQString());
|
|
|
|
m_matches.insert(r->uid, m_url);
|
|
|
|
emit signalResultFound(r);
|
|
|
|
|
|
|
|
m_hasMoreResults = false;
|
|
|
|
stop();
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::parseMultipleTitleResults() {
|
|
|
|
// myDebug() << "IMDBFetcher::parseMultipleTitleResults()" << endl;
|
|
|
|
TQString output = Tellico::decodeHTML(TQString(m_data));
|
|
|
|
|
|
|
|
// IMDb can return three title lists, popular, exact, and partial
|
|
|
|
// the popular titles are in the first table, after the "Popular Results" text
|
|
|
|
int pos_popular = output.find(TQString::fromLatin1("Popular Titles"), 0, false);
|
|
|
|
int pos_exact = output.find(TQString::fromLatin1("Exact Matches"), TQMAX(pos_popular, 0), false);
|
|
|
|
int pos_partial = output.find(TQString::fromLatin1("Partial Matches"), TQMAX(pos_exact, 0), false);
|
|
|
|
int end_popular = pos_exact; // keep track of where to end
|
|
|
|
if(end_popular == -1) {
|
|
|
|
end_popular = pos_partial == -1 ? output.length() : pos_partial;
|
|
|
|
}
|
|
|
|
int end_exact = pos_partial; // keep track of where to end
|
|
|
|
if(end_exact == -1) {
|
|
|
|
end_exact = output.length();
|
|
|
|
}
|
|
|
|
|
|
|
|
// if found popular matches
|
|
|
|
if(pos_popular > -1) {
|
|
|
|
m_popularTitles = output.mid(pos_popular, end_popular-pos_popular);
|
|
|
|
}
|
|
|
|
// if found exact matches
|
|
|
|
if(pos_exact > -1) {
|
|
|
|
m_exactTitles = output.mid(pos_exact, end_exact-pos_exact);
|
|
|
|
}
|
|
|
|
if(pos_partial > -1) {
|
|
|
|
m_partialTitles = output.mid(pos_partial);
|
|
|
|
}
|
|
|
|
|
|
|
|
parseTitleBlock(m_popularTitles);
|
|
|
|
// if the offset is 0, then we need to be looking at the next block
|
|
|
|
m_currentTitleBlock = m_countOffset == 0 ? Exact : Popular;
|
|
|
|
|
|
|
|
if(m_matches.size() < m_limit) {
|
|
|
|
parseTitleBlock(m_exactTitles);
|
|
|
|
m_currentTitleBlock = m_countOffset == 0 ? Partial : Exact;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(m_matches.size() < m_limit) {
|
|
|
|
parseTitleBlock(m_partialTitles);
|
|
|
|
m_currentTitleBlock = m_countOffset == 0 ? Unknown : Partial;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
if(m_matches.size() == 0) {
|
|
|
|
myDebug() << "IMDBFetcher::parseMultipleTitleResults() - no matches found." << endl;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
stop();
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::parseTitleBlock(const TQString& str_) {
|
|
|
|
if(str_.isEmpty()) {
|
|
|
|
m_countOffset = 0;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// myDebug() << "IMDBFetcher::parseTitleBlock() - " << m_currentTitleBlock << endl;
|
|
|
|
|
|
|
|
TQRegExp akaRx(TQString::fromLatin1("aka (.*)(</li>|<br)"), false);
|
|
|
|
akaRx.setMinimal(true);
|
|
|
|
|
|
|
|
m_hasMoreResults = false;
|
|
|
|
|
|
|
|
int count = 0;
|
|
|
|
int start = s_anchorTitleRx->search(str_);
|
|
|
|
while(m_started && start > -1) {
|
|
|
|
// split title at parenthesis
|
|
|
|
const TQString cap1 = s_anchorTitleRx->cap(1); // the anchor url
|
|
|
|
const TQString cap2 = s_anchorTitleRx->cap(2).stripWhiteSpace(); // the anchor text
|
|
|
|
start += s_anchorTitleRx->matchedLength();
|
|
|
|
int pPos = cap2.find('('); // if it has parentheses, use that for description
|
|
|
|
TQString desc;
|
|
|
|
if(pPos > -1) {
|
|
|
|
int pPos2 = cap2.find(')', pPos+1);
|
|
|
|
if(pPos2 > -1) {
|
|
|
|
desc = cap2.mid(pPos+1, pPos2-pPos-1);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// parenthesis might be outside anchor tag
|
|
|
|
int end = s_anchorTitleRx->search(str_, start);
|
|
|
|
if(end == -1) {
|
|
|
|
end = str_.length();
|
|
|
|
}
|
|
|
|
TQString text = str_.mid(start, end-start);
|
|
|
|
pPos = text.find('(');
|
|
|
|
if(pPos > -1) {
|
|
|
|
int pNewLine = text.find(TQString::fromLatin1("<br"));
|
|
|
|
if(pNewLine == -1 || pPos < pNewLine) {
|
|
|
|
int pPos2 = text.find(')', pPos);
|
|
|
|
desc = text.mid(pPos+1, pPos2-pPos-1);
|
|
|
|
}
|
|
|
|
pPos = -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// multiple matches might have 'aka' info
|
|
|
|
int end = s_anchorTitleRx->search(str_, start+1);
|
|
|
|
if(end == -1) {
|
|
|
|
end = str_.length();
|
|
|
|
}
|
|
|
|
int akaPos = akaRx.search(str_, start+1);
|
|
|
|
if(akaPos > -1 && akaPos < end) {
|
|
|
|
// limit to 50 chars
|
|
|
|
desc += TQChar(' ') + akaRx.cap(1).stripWhiteSpace().remove(*s_tagRx);
|
|
|
|
if(desc.length() > 50) {
|
|
|
|
desc = desc.left(50) + TQString::fromLatin1("...");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
start = s_anchorTitleRx->search(str_, start);
|
|
|
|
|
|
|
|
if(count < m_countOffset) {
|
|
|
|
++count;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// if we got this far, then there is a valid result
|
|
|
|
if(m_matches.size() >= m_limit) {
|
|
|
|
m_hasMoreResults = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
SearchResult* r = new SearchResult(this, pPos == -1 ? cap2 : cap2.left(pPos), desc, TQString());
|
|
|
|
KURL u(m_url, cap1);
|
|
|
|
u.setQuery(TQString());
|
|
|
|
m_matches.insert(r->uid, u);
|
|
|
|
emit signalResultFound(r);
|
|
|
|
++count;
|
|
|
|
}
|
|
|
|
if(!m_hasMoreResults && m_currentTitleBlock != Partial) {
|
|
|
|
m_hasMoreResults = true;
|
|
|
|
}
|
|
|
|
m_countOffset = m_matches.size() < m_limit ? 0 : count;
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::parseSingleNameResult() {
|
|
|
|
// myDebug() << "IMDBFetcher::parseSingleNameResult()" << endl;
|
|
|
|
|
|
|
|
m_currentTitleBlock = SinglePerson;
|
|
|
|
|
|
|
|
TQString output = Tellico::decodeHTML(TQString(m_data));
|
|
|
|
|
|
|
|
int pos = s_anchorTitleRx->search(output);
|
|
|
|
if(pos == -1) {
|
|
|
|
stop();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
TQRegExp tvRegExp(TQString::fromLatin1("TV\\sEpisode"), false);
|
|
|
|
|
|
|
|
int len = 0;
|
|
|
|
int count = 0;
|
|
|
|
TQString desc;
|
|
|
|
for( ; m_started && pos > -1; pos = s_anchorTitleRx->search(output, pos+len)) {
|
|
|
|
desc.truncate(0);
|
|
|
|
bool isEpisode = false;
|
|
|
|
len = s_anchorTitleRx->cap(0).length();
|
|
|
|
// split title at parenthesis
|
|
|
|
const TQString cap2 = s_anchorTitleRx->cap(2).stripWhiteSpace();
|
|
|
|
int pPos = cap2.find('(');
|
|
|
|
if(pPos > -1) {
|
|
|
|
desc = cap2.mid(pPos);
|
|
|
|
} else {
|
|
|
|
// look until the next <a
|
|
|
|
int aPos = output.find(TQString::fromLatin1("<a"), pos+len, false);
|
|
|
|
if(aPos == -1) {
|
|
|
|
aPos = output.length();
|
|
|
|
}
|
|
|
|
TQString tmp = output.mid(pos+len, aPos-pos-len);
|
|
|
|
if(tmp.find(tvRegExp) > -1) {
|
|
|
|
isEpisode = true;
|
|
|
|
}
|
|
|
|
pPos = tmp.find('(');
|
|
|
|
if(pPos > -1) {
|
|
|
|
int pNewLine = tmp.find(TQString::fromLatin1("<br"));
|
|
|
|
if(pNewLine == -1 || pPos < pNewLine) {
|
|
|
|
int pEnd = tmp.find(')', pPos+1);
|
|
|
|
desc = tmp.mid(pPos+1, pEnd-pPos-1).remove(*s_tagRx);
|
|
|
|
}
|
|
|
|
// but need to indicate it wasn't found initially
|
|
|
|
pPos = -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
;
|
|
|
|
|
|
|
|
if(count < m_countOffset) {
|
|
|
|
++count;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
++count;
|
|
|
|
if(isEpisode) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// if we got this far, then there is a valid result
|
|
|
|
if(m_matches.size() >= m_limit) {
|
|
|
|
m_hasMoreResults = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: maybe remove parentheses here?
|
|
|
|
SearchResult* r = new SearchResult(this, pPos == -1 ? cap2 : cap2.left(pPos), desc, TQString());
|
|
|
|
KURL u(m_url, s_anchorTitleRx->cap(1)); // relative URL constructor
|
|
|
|
u.setQuery(TQString());
|
|
|
|
m_matches.insert(r->uid, u);
|
|
|
|
// myDebug() << u.prettyURL() << endl;
|
|
|
|
// myDebug() << cap2 << endl;
|
|
|
|
emit signalResultFound(r);
|
|
|
|
}
|
|
|
|
if(pos == -1) {
|
|
|
|
m_hasMoreResults = false;
|
|
|
|
}
|
|
|
|
m_countOffset = count - 1;
|
|
|
|
|
|
|
|
stop();
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::parseMultipleNameResults() {
|
|
|
|
// myDebug() << "IMDBFetcher::parseMultipleNameResults()" << endl;
|
|
|
|
|
|
|
|
// the exact results are in the first table after the "exact results" text
|
|
|
|
TQString output = Tellico::decodeHTML(TQString(m_data));
|
|
|
|
int pos = output.find(TQString::fromLatin1("Popular Results"), 0, false);
|
|
|
|
if(pos == -1) {
|
|
|
|
pos = output.find(TQString::fromLatin1("Exact Matches"), 0, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
// find beginning of partial matches
|
|
|
|
int end = output.find(TQString::fromLatin1("Other Results"), TQMAX(pos, 0), false);
|
|
|
|
if(end == -1) {
|
|
|
|
end = output.find(TQString::fromLatin1("Partial Matches"), TQMAX(pos, 0), false);
|
|
|
|
if(end == -1) {
|
|
|
|
end = output.find(TQString::fromLatin1("Approx Matches"), TQMAX(pos, 0), false);
|
|
|
|
if(end == -1) {
|
|
|
|
end = output.length();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TQMap<TQString, KURL> map;
|
|
|
|
TQMap<TQString, int> nameMap;
|
|
|
|
|
|
|
|
TQString s;
|
|
|
|
// if found exact matches
|
|
|
|
if(pos > -1) {
|
|
|
|
pos = s_anchorNameRx->search(output, pos+13);
|
|
|
|
while(pos > -1 && pos < end && m_matches.size() < m_limit) {
|
|
|
|
KURL u(m_url, s_anchorNameRx->cap(1));
|
|
|
|
s = s_anchorNameRx->cap(2).stripWhiteSpace() + ' ';
|
|
|
|
// if more than one exact, add parentheses
|
|
|
|
if(nameMap.contains(s) && nameMap[s] > 0) {
|
|
|
|
// fix the first one that didn't have a number
|
|
|
|
if(nameMap[s] == 1) {
|
|
|
|
KURL u2 = map[s];
|
|
|
|
map.remove(s);
|
|
|
|
map.insert(s + "(1) ", u2);
|
|
|
|
}
|
|
|
|
nameMap.insert(s, nameMap[s] + 1);
|
|
|
|
// check for duplicate names
|
|
|
|
s += TQString::fromLatin1("(%1) ").tqarg(nameMap[s]);
|
|
|
|
} else {
|
|
|
|
nameMap.insert(s, 1);
|
|
|
|
}
|
|
|
|
map.insert(s, u);
|
|
|
|
pos = s_anchorNameRx->search(output, pos+s_anchorNameRx->cap(0).length());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// go ahead and search for partial matches
|
|
|
|
pos = s_anchorNameRx->search(output, end);
|
|
|
|
while(pos > -1 && m_matches.size() < m_limit) {
|
|
|
|
KURL u(m_url, s_anchorNameRx->cap(1)); // relative URL
|
|
|
|
s = s_anchorNameRx->cap(2).stripWhiteSpace();
|
|
|
|
if(nameMap.contains(s) && nameMap[s] > 0) {
|
|
|
|
// fix the first one that didn't have a number
|
|
|
|
if(nameMap[s] == 1) {
|
|
|
|
KURL u2 = map[s];
|
|
|
|
map.remove(s);
|
|
|
|
map.insert(s + " (1)", u2);
|
|
|
|
}
|
|
|
|
nameMap.insert(s, nameMap[s] + 1);
|
|
|
|
// check for duplicate names
|
|
|
|
s += TQString::fromLatin1(" (%1)").tqarg(nameMap[s]);
|
|
|
|
} else {
|
|
|
|
nameMap.insert(s, 1);
|
|
|
|
}
|
|
|
|
map.insert(s, u);
|
|
|
|
pos = s_anchorNameRx->search(output, pos+s_anchorNameRx->cap(0).length());
|
|
|
|
}
|
|
|
|
|
|
|
|
if(map.count() == 0) {
|
|
|
|
stop();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
KDialogBase* dlg = new KDialogBase(Kernel::self()->widget(), "imdb dialog",
|
|
|
|
true, i18n("Select IMDB Result"), KDialogBase::Ok|KDialogBase::Cancel);
|
|
|
|
TQVBox* box = new TQVBox(dlg);
|
|
|
|
box->setSpacing(10);
|
|
|
|
(void) new TQLabel(i18n("<qt>Your search returned multiple matches. Please select one below.</qt>"), box);
|
|
|
|
|
|
|
|
TQListBox* listBox = new TQListBox(box);
|
|
|
|
listBox->setMinimumWidth(400);
|
|
|
|
listBox->setColumnMode(TQListBox::FitToWidth);
|
|
|
|
const TQStringList values = map.keys();
|
|
|
|
for(TQStringList::ConstIterator it = values.begin(); it != values.end(); ++it) {
|
|
|
|
if((*it).endsWith(TQChar(' '))) {
|
|
|
|
GUI::ListBoxText* box = new GUI::ListBoxText(listBox, *it, 0);
|
|
|
|
box->setColored(true);
|
|
|
|
} else {
|
|
|
|
(void) new GUI::ListBoxText(listBox, *it);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
listBox->setSelected(0, true);
|
|
|
|
TQWhatsThis::add(listBox, i18n("<qt>Select a search result.</qt>"));
|
|
|
|
|
|
|
|
dlg->setMainWidget(box);
|
|
|
|
if(dlg->exec() != TQDialog::Accepted || listBox->currentText().isEmpty()) {
|
|
|
|
dlg->delayedDestruct();
|
|
|
|
stop();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
m_url = map[listBox->currentText()];
|
|
|
|
dlg->delayedDestruct();
|
|
|
|
|
|
|
|
// redirected is true since that's how I tell if an exact match has been found
|
|
|
|
m_redirected = true;
|
|
|
|
m_data.truncate(0);
|
|
|
|
m_job = KIO::get(m_url, false, false);
|
|
|
|
connect(m_job, TQT_SIGNAL(data(KIO::Job*, const TQByteArray&)),
|
|
|
|
TQT_SLOT(slotData(KIO::Job*, const TQByteArray&)));
|
|
|
|
connect(m_job, TQT_SIGNAL(result(KIO::Job*)),
|
|
|
|
TQT_SLOT(slotComplete(KIO::Job*)));
|
|
|
|
connect(m_job, TQT_SIGNAL(redirection(KIO::Job *, const KURL&)),
|
|
|
|
TQT_SLOT(slotRedirection(KIO::Job*, const KURL&)));
|
|
|
|
|
|
|
|
// do not stop() here
|
|
|
|
}
|
|
|
|
|
|
|
|
Tellico::Data::EntryPtr IMDBFetcher::fetchEntry(uint uid_) {
|
|
|
|
// if we already grabbed this one, then just pull it out of the dict
|
|
|
|
Data::EntryPtr entry = m_entries[uid_];
|
|
|
|
if(entry) {
|
|
|
|
return entry;
|
|
|
|
}
|
|
|
|
|
|
|
|
KURL url = m_matches[uid_];
|
|
|
|
if(url.isEmpty()) {
|
|
|
|
myDebug() << "IMDBFetcher::fetchEntry() - no url found" << endl;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
KURL origURL = m_url; // keep to switch back
|
|
|
|
TQString results;
|
|
|
|
// if the url matches the current one, no need to redownload it
|
|
|
|
if(url == m_url) {
|
|
|
|
// myDebug() << "IMDBFetcher::fetchEntry() - matches previous URL, no downloading needed." << endl;
|
|
|
|
results = Tellico::decodeHTML(TQString(m_data));
|
|
|
|
} else {
|
|
|
|
// now it's sychronous
|
|
|
|
#ifdef IMDB_TEST
|
|
|
|
KURL u = KURL::fromPathOrURL(TQString::fromLatin1("/home/robby/imdb-title-result.html"));
|
|
|
|
results = Tellico::decodeHTML(FileHandler::readTextFile(u));
|
|
|
|
#else
|
|
|
|
// be quiet about failure
|
|
|
|
results = Tellico::decodeHTML(FileHandler::readTextFile(url, true));
|
|
|
|
m_url = url; // needed for processing
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
if(results.isEmpty()) {
|
|
|
|
myDebug() << "IMDBFetcher::fetchEntry() - no text results" << endl;
|
|
|
|
m_url = origURL;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
entry = parseEntry(results);
|
|
|
|
m_url = origURL;
|
|
|
|
if(!entry) {
|
|
|
|
myDebug() << "IMDBFetcher::fetchEntry() - error in processing entry" << endl;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
m_entries.insert(uid_, entry); // keep for later
|
|
|
|
return entry;
|
|
|
|
}
|
|
|
|
|
|
|
|
Tellico::Data::EntryPtr IMDBFetcher::parseEntry(const TQString& str_) {
|
|
|
|
Data::CollPtr coll = new Data::VideoCollection(true);
|
|
|
|
Data::EntryPtr entry = new Data::Entry(coll);
|
|
|
|
|
|
|
|
doTitle(str_, entry);
|
|
|
|
doRunningTime(str_, entry);
|
|
|
|
doAspectRatio(str_, entry);
|
|
|
|
doAlsoKnownAs(str_, entry);
|
|
|
|
doPlot(str_, entry, m_url);
|
|
|
|
doLists(str_, entry);
|
|
|
|
doPerson(str_, entry, TQString::fromLatin1("Director"), TQString::fromLatin1("director"));
|
|
|
|
doPerson(str_, entry, TQString::fromLatin1("Writer"), TQString::fromLatin1("writer"));
|
|
|
|
doRating(str_, entry);
|
|
|
|
doCast(str_, entry, m_url);
|
|
|
|
if(m_fetchImages) {
|
|
|
|
// needs base URL
|
|
|
|
doCover(str_, entry, m_url);
|
|
|
|
}
|
|
|
|
|
|
|
|
const TQString imdb = TQString::fromLatin1("imdb");
|
|
|
|
if(!coll->hasField(imdb) && m_fields.findIndex(imdb) > -1) {
|
|
|
|
Data::FieldPtr field = new Data::Field(imdb, i18n("IMDB Link"), Data::Field::URL);
|
|
|
|
field->setCategory(i18n("General"));
|
|
|
|
coll->addField(field);
|
|
|
|
}
|
|
|
|
if(coll->hasField(imdb) && coll->fieldByName(imdb)->type() == Data::Field::URL) {
|
|
|
|
m_url.setQuery(TQString());
|
|
|
|
entry->setField(imdb, m_url.url());
|
|
|
|
}
|
|
|
|
return entry;
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::doTitle(const TQString& str_, Data::EntryPtr entry_) {
|
|
|
|
if(s_titleRx->search(str_) > -1) {
|
|
|
|
const TQString cap1 = s_titleRx->cap(1);
|
|
|
|
// titles always have parentheses
|
|
|
|
int pPos = cap1.find('(');
|
|
|
|
TQString title = cap1.left(pPos).stripWhiteSpace();
|
|
|
|
// remove first and last quotes is there
|
|
|
|
if(title.startsWith(TQChar('"')) && title.endsWith(TQChar('"'))) {
|
|
|
|
title = title.mid(1, title.length()-2);
|
|
|
|
}
|
|
|
|
entry_->setField(TQString::fromLatin1("title"), title);
|
|
|
|
// remove parenthesis
|
|
|
|
uint pPos2 = pPos+1;
|
|
|
|
while(pPos2 < cap1.length() && cap1[pPos2].isDigit()) {
|
|
|
|
++pPos2;
|
|
|
|
}
|
|
|
|
TQString year = cap1.mid(pPos+1, pPos2-pPos-1);
|
|
|
|
if(!year.isEmpty()) {
|
|
|
|
entry_->setField(TQString::fromLatin1("year"), year);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::doRunningTime(const TQString& str_, Data::EntryPtr entry_) {
|
|
|
|
// running time
|
|
|
|
TQRegExp runtimeRx(TQString::fromLatin1("runtime:.*(\\d+)\\s+min"), false);
|
|
|
|
runtimeRx.setMinimal(true);
|
|
|
|
|
|
|
|
if(runtimeRx.search(str_) > -1) {
|
|
|
|
// myDebug() << "running-time = " << runtimeRx.cap(1) << endl;
|
|
|
|
entry_->setField(TQString::fromLatin1("running-time"), runtimeRx.cap(1));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::doAspectRatio(const TQString& str_, Data::EntryPtr entry_) {
|
|
|
|
TQRegExp rx(TQString::fromLatin1("aspect ratio:.*([\\d\\.]+\\s*:\\s*[\\d\\.]+)"), false);
|
|
|
|
rx.setMinimal(true);
|
|
|
|
|
|
|
|
if(rx.search(str_) > -1) {
|
|
|
|
// myDebug() << "aspect ratio = " << rx.cap(1) << endl;
|
|
|
|
entry_->setField(TQString::fromLatin1("aspect-ratio"), rx.cap(1).stripWhiteSpace());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::doAlsoKnownAs(const TQString& str_, Data::EntryPtr entry_) {
|
|
|
|
if(m_fields.findIndex(TQString::fromLatin1("alttitle")) == -1) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// match until next b tag
|
|
|
|
// TQRegExp akaRx(TQString::fromLatin1("also known as(.*)<b(?:\\s.*)?>"));
|
|
|
|
TQRegExp akaRx(TQString::fromLatin1("also known as(.*)<(b[>\\s/]|div)"), false);
|
|
|
|
akaRx.setMinimal(true);
|
|
|
|
|
|
|
|
if(akaRx.search(str_) > -1 && !akaRx.cap(1).isEmpty()) {
|
|
|
|
Data::FieldPtr f = entry_->collection()->fieldByName(TQString::fromLatin1("alttitle"));
|
|
|
|
if(!f) {
|
|
|
|
f = new Data::Field(TQString::fromLatin1("alttitle"), i18n("Alternative Titles"), Data::Field::Table);
|
|
|
|
f->setFormatFlag(Data::Field::FormatTitle);
|
|
|
|
entry_->collection()->addField(f);
|
|
|
|
}
|
|
|
|
|
|
|
|
// split by <br>, remembering it could become valid xhtml!
|
|
|
|
TQRegExp brRx(TQString::fromLatin1("<br[\\s/]*>"), false);
|
|
|
|
brRx.setMinimal(true);
|
|
|
|
TQStringList list = TQStringList::split(brRx, akaRx.cap(1));
|
|
|
|
// lang could be included with [fr]
|
|
|
|
// const TQRegExp parRx(TQString::fromLatin1("\\(.+\\)"));
|
|
|
|
const TQRegExp brackRx(TQString::fromLatin1("\\[\\w+\\]"));
|
|
|
|
TQStringList values;
|
|
|
|
for(TQStringList::Iterator it = list.begin(); it != list.end(); ++it) {
|
|
|
|
TQString s = *it;
|
|
|
|
// sometimes, the word "more" gets linked to the releaseinfo page, check that
|
|
|
|
if(s.find(TQString::fromLatin1("releaseinfo")) > -1) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
s.remove(*s_tagRx);
|
|
|
|
s.remove(brackRx);
|
|
|
|
s = s.stripWhiteSpace();
|
|
|
|
// the first value ends up being or starting with the colon after "Also know as"
|
|
|
|
// I'm too lazy to figure out a better regexp
|
|
|
|
if(s.startsWith(TQChar(':'))) {
|
|
|
|
s = s.mid(1);
|
|
|
|
}
|
|
|
|
if(!s.isEmpty()) {
|
|
|
|
values += s;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(!values.isEmpty()) {
|
|
|
|
entry_->setField(TQString::fromLatin1("alttitle"), values.join(sep));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::doPlot(const TQString& str_, Data::EntryPtr entry_, const KURL& baseURL_) {
|
|
|
|
// plot summaries provided by users are on a separate page
|
|
|
|
// should those be preferred?
|
|
|
|
|
|
|
|
bool useUserSummary = false;
|
|
|
|
|
|
|
|
TQString thisPlot;
|
|
|
|
// match until next opening tag
|
|
|
|
TQRegExp plotRx(TQString::fromLatin1("plot (?:outline|summary):(.*)<[^/].*</"), false);
|
|
|
|
plotRx.setMinimal(true);
|
|
|
|
TQRegExp plotURLRx(TQString::fromLatin1("<a\\s+.*href\\s*=\\s*\".*/title/.*/plotsummary\""), false);
|
|
|
|
plotURLRx.setMinimal(true);
|
|
|
|
if(plotRx.search(str_) > -1) {
|
|
|
|
thisPlot = plotRx.cap(1);
|
|
|
|
thisPlot.remove(*s_tagRx); // remove HTML tags
|
|
|
|
entry_->setField(TQString::fromLatin1("plot"), thisPlot);
|
|
|
|
// if thisPlot ends with (more) or contains
|
|
|
|
// a url that ends with plotsummary, then we'll grab it, otherwise not
|
|
|
|
if(plotRx.cap(0).endsWith(TQString::fromLatin1("(more)</")) || plotURLRx.search(plotRx.cap(0)) > -1) {
|
|
|
|
useUserSummary = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if(useUserSummary) {
|
|
|
|
TQRegExp idRx(TQString::fromLatin1("title/(tt\\d+)"));
|
|
|
|
idRx.search(baseURL_.path());
|
|
|
|
KURL plotURL = baseURL_;
|
|
|
|
plotURL.setPath(TQString::fromLatin1("/title/") + idRx.cap(1) + TQString::fromLatin1("/plotsummary"));
|
|
|
|
// be quiet about failure
|
|
|
|
TQString plotPage = FileHandler::readTextFile(plotURL, true);
|
|
|
|
|
|
|
|
if(!plotPage.isEmpty()) {
|
|
|
|
TQRegExp plotRx(TQString::fromLatin1("<p\\s+class\\s*=\\s*\"plotpar\">(.*)</p"));
|
|
|
|
plotRx.setMinimal(true);
|
|
|
|
if(plotRx.search(plotPage) > -1) {
|
|
|
|
TQString userPlot = plotRx.cap(1);
|
|
|
|
userPlot.remove(*s_tagRx); // remove HTML tags
|
|
|
|
entry_->setField(TQString::fromLatin1("plot"), Tellico::decodeHTML(userPlot));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::doPerson(const TQString& str_, Data::EntryPtr entry_,
|
|
|
|
const TQString& imdbHeader_, const TQString& fieldName_) {
|
|
|
|
TQRegExp br2Rx(TQString::fromLatin1("<br[\\s/]*>\\s*<br[\\s/]*>"), false);
|
|
|
|
br2Rx.setMinimal(true);
|
|
|
|
TQRegExp divRx(TQString::fromLatin1("<[/]*div"), false);
|
|
|
|
divRx.setMinimal(true);
|
|
|
|
TQString name = TQString::fromLatin1("/name/");
|
|
|
|
|
|
|
|
StringSet people;
|
|
|
|
for(int pos = str_.find(imdbHeader_); pos > 0; pos = str_.find(imdbHeader_, pos)) {
|
|
|
|
// loop until repeated <br> tags or </div> tag
|
|
|
|
const int endPos1 = str_.find(br2Rx, pos);
|
|
|
|
const int endPos2 = str_.find(divRx, pos);
|
|
|
|
const int endPos = TQMIN(endPos1, endPos2); // ok to be -1
|
|
|
|
pos = s_anchorRx->search(str_, pos+1);
|
|
|
|
while(pos > -1 && pos < endPos) {
|
|
|
|
if(s_anchorRx->cap(1).find(name) > -1) {
|
|
|
|
people.add(s_anchorRx->cap(2).stripWhiteSpace());
|
|
|
|
}
|
|
|
|
pos = s_anchorRx->search(str_, pos+1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(!people.isEmpty()) {
|
|
|
|
entry_->setField(fieldName_, people.toList().join(sep));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::doCast(const TQString& str_, Data::EntryPtr entry_, const KURL& baseURL_) {
|
|
|
|
// the extended cast list is on a separate page
|
|
|
|
// that's usually a lot of people
|
|
|
|
// but since it can be in billing order, the main actors might not
|
|
|
|
// be in the short list
|
|
|
|
TQRegExp idRx(TQString::fromLatin1("title/(tt\\d+)"));
|
|
|
|
idRx.search(baseURL_.path());
|
|
|
|
#ifdef IMDB_TEST
|
|
|
|
KURL castURL = KURL::fromPathOrURL(TQString::fromLatin1("/home/robby/imdb-title-fullcredits.html"));
|
|
|
|
#else
|
|
|
|
KURL castURL = baseURL_;
|
|
|
|
castURL.setPath(TQString::fromLatin1("/title/") + idRx.cap(1) + TQString::fromLatin1("/fullcredits"));
|
|
|
|
#endif
|
|
|
|
// be quiet about failure and be sure to translate entities
|
|
|
|
TQString castPage = Tellico::decodeHTML(FileHandler::readTextFile(castURL, true));
|
|
|
|
|
|
|
|
int pos = -1;
|
|
|
|
// the text to search, depends on which page is being read
|
|
|
|
TQString castText = castPage;
|
|
|
|
if(castText.isEmpty()) {
|
|
|
|
// fall back to short list
|
|
|
|
castText = str_;
|
|
|
|
pos = castText.find(TQString::fromLatin1("cast overview"), 0, false);
|
|
|
|
if(pos == -1) {
|
|
|
|
pos = castText.find(TQString::fromLatin1("credited cast"), 0, false);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// first look for anchor
|
|
|
|
TQRegExp castAnchorRx(TQString::fromLatin1("<a\\s+name\\s*=\\s*\"cast\""), false);
|
|
|
|
pos = castText.find(castAnchorRx);
|
|
|
|
if(pos < 0) {
|
|
|
|
TQRegExp tableClassRx(TQString::fromLatin1("<table\\s+class\\s*=\\s*\"cast\""), false);
|
|
|
|
pos = castText.find(tableClassRx);
|
|
|
|
if(pos < 0) {
|
|
|
|
// fragile, the word "cast" appears in the title, but need to find
|
|
|
|
// the one right above the actual cast table
|
|
|
|
// for TV shows, there's a link on the sidebar for "episodes case"
|
|
|
|
// so need to not match that one
|
|
|
|
pos = castText.find(TQString::fromLatin1("cast</"), 0, false);
|
|
|
|
if(pos > 9) {
|
|
|
|
// back up 9 places
|
|
|
|
if(castText.mid(pos-9, 9).startsWith(TQString::fromLatin1("episodes"))) {
|
|
|
|
// find next cast list
|
|
|
|
pos = castText.find(TQString::fromLatin1("cast</"), pos+6, false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(pos == -1) { // no cast list found
|
|
|
|
myDebug() << "IMDBFetcher::doCast() - no cast list found" << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
const TQString name = TQString::fromLatin1("/name/");
|
|
|
|
TQRegExp tdRx(TQString::fromLatin1("<td[^>]*>(.*)</td>"), false);
|
|
|
|
tdRx.setMinimal(true);
|
|
|
|
|
|
|
|
TQStringList cast;
|
|
|
|
// loop until closing table tag
|
|
|
|
const int endPos = castText.find(TQString::fromLatin1("</table"), pos, false);
|
|
|
|
pos = s_anchorRx->search(castText, pos+1);
|
|
|
|
while(pos > -1 && pos < endPos && static_cast<int>(cast.count()) < m_numCast) {
|
|
|
|
if(s_anchorRx->cap(1).find(name) > -1) {
|
|
|
|
// now search for <td> item with character name
|
|
|
|
// there's a column with ellipses then the character
|
|
|
|
const int pos2 = tdRx.search(castText, pos);
|
|
|
|
if(pos2 > -1 && tdRx.search(castText, pos2+1) > -1) {
|
|
|
|
cast += s_anchorRx->cap(2).stripWhiteSpace()
|
|
|
|
+ TQString::fromLatin1("::") + tdRx.cap(1).simplifyWhiteSpace().remove(*s_tagRx);
|
|
|
|
} else {
|
|
|
|
cast += s_anchorRx->cap(2).stripWhiteSpace();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pos = s_anchorRx->search(castText, pos+1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if(!cast.isEmpty()) {
|
|
|
|
entry_->setField(TQString::fromLatin1("cast"), cast.join(sep));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::doRating(const TQString& str_, Data::EntryPtr entry_) {
|
|
|
|
if(m_fields.findIndex(TQString::fromLatin1("imdb-rating")) == -1) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// don't add a colon, since there's a <br> at the end
|
|
|
|
// some of the imdb images use /10.gif in their path, so check for space or bracket
|
|
|
|
TQRegExp rx(TQString::fromLatin1("[>\\s](\\d+.?\\d*)/10[<//s]"), false);
|
|
|
|
rx.setMinimal(true);
|
|
|
|
|
|
|
|
if(rx.search(str_) > -1 && !rx.cap(1).isEmpty()) {
|
|
|
|
Data::FieldPtr f = entry_->collection()->fieldByName(TQString::fromLatin1("imdb-rating"));
|
|
|
|
if(!f) {
|
|
|
|
f = new Data::Field(TQString::fromLatin1("imdb-rating"), i18n("IMDB Rating"), Data::Field::Rating);
|
|
|
|
f->setCategory(i18n("General"));
|
|
|
|
f->setProperty(TQString::fromLatin1("maximum"), TQString::fromLatin1("10"));
|
|
|
|
entry_->collection()->addField(f);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool ok;
|
|
|
|
float value = rx.cap(1).toFloat(&ok);
|
|
|
|
if(ok) {
|
|
|
|
entry_->setField(TQString::fromLatin1("imdb-rating"), TQString::number(value));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::doCover(const TQString& str_, Data::EntryPtr entry_, const KURL& baseURL_) {
|
|
|
|
// cover is the img with the "cover" alt text
|
|
|
|
TQRegExp imgRx(TQString::fromLatin1("<img\\s+[^>]*src\\s*=\\s*\"([^\"]*)\"[^>]*>"), false);
|
|
|
|
imgRx.setMinimal(true);
|
|
|
|
|
|
|
|
TQRegExp posterRx(TQString::fromLatin1("<a\\s+[^>]*name\\s*=\\s*\"poster\"[^>]*>(.*)</a>"), false);
|
|
|
|
posterRx.setMinimal(true);
|
|
|
|
|
|
|
|
const TQString cover = TQString::fromLatin1("cover");
|
|
|
|
|
|
|
|
int pos = posterRx.search(str_);
|
|
|
|
while(pos > -1) {
|
|
|
|
if(imgRx.search(posterRx.cap(1)) > -1) {
|
|
|
|
KURL u(baseURL_, imgRx.cap(1));
|
|
|
|
TQString id = ImageFactory::addImage(u, true);
|
|
|
|
if(!id.isEmpty()) {
|
|
|
|
entry_->setField(cover, id);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
pos = posterRx.search(str_, pos+1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// didn't find the cover, IMDb also used to put "cover" inside the url
|
|
|
|
pos = imgRx.search(str_);
|
|
|
|
while(pos > -1) {
|
|
|
|
if(imgRx.cap(0).find(cover, 0, false) > -1) {
|
|
|
|
KURL u(baseURL_, imgRx.cap(1));
|
|
|
|
TQString id = ImageFactory::addImage(u, true);
|
|
|
|
if(!id.isEmpty()) {
|
|
|
|
entry_->setField(cover, id);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
pos = imgRx.search(str_, pos+1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// end up reparsing whole string, but it's not really that slow
|
|
|
|
// loook at every anchor tag in the string
|
|
|
|
void IMDBFetcher::doLists(const TQString& str_, Data::EntryPtr entry_) {
|
|
|
|
const TQString genre = TQString::fromLatin1("/Genres/");
|
|
|
|
const TQString country = TQString::fromLatin1("/Countries/");
|
|
|
|
const TQString lang = TQString::fromLatin1("/Languages/");
|
|
|
|
const TQString colorInfo = TQString::fromLatin1("color-info");
|
|
|
|
const TQString cert = TQString::fromLatin1("certificates=");
|
|
|
|
const TQString soundMix = TQString::fromLatin1("sound-mix=");
|
|
|
|
const TQString year = TQString::fromLatin1("/Years/");
|
|
|
|
const TQString company = TQString::fromLatin1("/company/");
|
|
|
|
|
|
|
|
// IIMdb also has links with the word "sections" in them, remove that
|
|
|
|
// for genres and nationalities
|
|
|
|
|
|
|
|
TQStringList genres, countries, langs, certs, tracks, studios;
|
|
|
|
for(int pos = s_anchorRx->search(str_); pos > -1; pos = s_anchorRx->search(str_, pos+1)) {
|
|
|
|
const TQString cap1 = s_anchorRx->cap(1);
|
|
|
|
if(cap1.find(genre) > -1) {
|
|
|
|
if(s_anchorRx->cap(2).find(TQString::fromLatin1(" section"), 0, false) == -1) {
|
|
|
|
genres += s_anchorRx->cap(2).stripWhiteSpace();
|
|
|
|
}
|
|
|
|
} else if(cap1.find(country) > -1) {
|
|
|
|
if(s_anchorRx->cap(2).find(TQString::fromLatin1(" section"), 0, false) == -1) {
|
|
|
|
countries += s_anchorRx->cap(2).stripWhiteSpace();
|
|
|
|
}
|
|
|
|
} else if(cap1.find(lang) > -1) {
|
|
|
|
langs += s_anchorRx->cap(2).stripWhiteSpace();
|
|
|
|
} else if(cap1.find(colorInfo) > -1) {
|
|
|
|
// change "black and white" to "black & white"
|
|
|
|
entry_->setField(TQString::fromLatin1("color"),
|
|
|
|
s_anchorRx->cap(2).replace(TQString::fromLatin1("and"), TQChar('&')).stripWhiteSpace());
|
|
|
|
} else if(cap1.find(cert) > -1) {
|
|
|
|
certs += s_anchorRx->cap(2).stripWhiteSpace();
|
|
|
|
} else if(cap1.find(soundMix) > -1) {
|
|
|
|
tracks += s_anchorRx->cap(2).stripWhiteSpace();
|
|
|
|
} else if(cap1.find(company) > -1) {
|
|
|
|
studios += s_anchorRx->cap(2).stripWhiteSpace();
|
|
|
|
// if year field wasn't set before, do it now
|
|
|
|
} else if(entry_->field(TQString::fromLatin1("year")).isEmpty() && cap1.find(year) > -1) {
|
|
|
|
entry_->setField(TQString::fromLatin1("year"), s_anchorRx->cap(2).stripWhiteSpace());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
entry_->setField(TQString::fromLatin1("genre"), genres.join(sep));
|
|
|
|
entry_->setField(TQString::fromLatin1("nationality"), countries.join(sep));
|
|
|
|
entry_->setField(TQString::fromLatin1("language"), langs.join(sep));
|
|
|
|
entry_->setField(TQString::fromLatin1("audio-track"), tracks.join(sep));
|
|
|
|
entry_->setField(TQString::fromLatin1("studio"), studios.join(sep));
|
|
|
|
if(!certs.isEmpty()) {
|
|
|
|
// first try to set default certification
|
|
|
|
const TQStringList& certsAllowed = entry_->collection()->fieldByName(TQString::fromLatin1("certification"))->allowed();
|
|
|
|
for(TQStringList::ConstIterator it = certs.begin(); it != certs.end(); ++it) {
|
|
|
|
TQString country = (*it).section(':', 0, 0);
|
|
|
|
TQString cert = (*it).section(':', 1, 1);
|
|
|
|
if(cert == Latin1Literal("Unrated")) {
|
|
|
|
cert = TQChar('U');
|
|
|
|
}
|
|
|
|
cert += TQString::fromLatin1(" (") + country + ')';
|
|
|
|
if(certsAllowed.findIndex(cert) > -1) {
|
|
|
|
entry_->setField(TQString::fromLatin1("certification"), cert);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// now add new field for all certifications
|
|
|
|
const TQString allc = TQString::fromLatin1("allcertification");
|
|
|
|
if(m_fields.findIndex(allc) > -1) {
|
|
|
|
Data::FieldPtr f = entry_->collection()->fieldByName(allc);
|
|
|
|
if(!f) {
|
|
|
|
f = new Data::Field(allc, i18n("Certifications"), Data::Field::Table);
|
|
|
|
f->setFlags(Data::Field::AllowGrouped);
|
|
|
|
entry_->collection()->addField(f);
|
|
|
|
}
|
|
|
|
entry_->setField(TQString::fromLatin1("allcertification"), certs.join(sep));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::updateEntry(Data::EntryPtr entry_) {
|
|
|
|
// myLog() << "IMDBFetcher::updateEntry() - " << entry_->title() << endl;
|
|
|
|
// only take first 5
|
|
|
|
m_limit = 5;
|
|
|
|
TQString t = entry_->field(TQString::fromLatin1("title"));
|
|
|
|
KURL link = entry_->field(TQString::fromLatin1("imdb"));
|
|
|
|
if(!link.isEmpty() && link.isValid()) {
|
|
|
|
// check if we want a different host
|
|
|
|
if(link.host() != m_host) {
|
|
|
|
// myLog() << "IMDBFetcher::updateEntry() - switching hosts to " << m_host << endl;
|
|
|
|
link.setHost(m_host);
|
|
|
|
}
|
|
|
|
m_key = Fetch::Title;
|
|
|
|
m_value = t;
|
|
|
|
m_started = true;
|
|
|
|
m_data.truncate(0);
|
|
|
|
m_matches.clear();
|
|
|
|
m_url = link;
|
|
|
|
m_redirected = true; // m_redirected is used as a flag later to tell if we get a single result
|
|
|
|
m_job = KIO::get(m_url, false, false);
|
|
|
|
connect(m_job, TQT_SIGNAL(data(KIO::Job*, const TQByteArray&)),
|
|
|
|
TQT_SLOT(slotData(KIO::Job*, const TQByteArray&)));
|
|
|
|
connect(m_job, TQT_SIGNAL(result(KIO::Job*)),
|
|
|
|
TQT_SLOT(slotComplete(KIO::Job*)));
|
|
|
|
connect(m_job, TQT_SIGNAL(redirection(KIO::Job *, const KURL&)),
|
|
|
|
TQT_SLOT(slotRedirection(KIO::Job*, const KURL&)));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// optimistically try searching for title and rely on Collection::sameEntry() to figure things out
|
|
|
|
if(!t.isEmpty()) {
|
|
|
|
search(Fetch::Title, t);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
emit signalDone(this); // always need to emit this if not continuing with the search
|
|
|
|
}
|
|
|
|
|
|
|
|
Tellico::Fetch::ConfigWidget* IMDBFetcher::configWidget(TQWidget* parent_) const {
|
|
|
|
return new IMDBFetcher::ConfigWidget(parent_, this);
|
|
|
|
}
|
|
|
|
|
|
|
|
IMDBFetcher::ConfigWidget::ConfigWidget(TQWidget* parent_, const IMDBFetcher* fetcher_/*=0*/)
|
|
|
|
: Fetch::ConfigWidget(parent_) {
|
|
|
|
TQGridLayout* l = new TQGridLayout(optionsWidget(), 4, 2);
|
|
|
|
l->setSpacing(4);
|
|
|
|
l->setColStretch(1, 10);
|
|
|
|
|
|
|
|
int row = -1;
|
|
|
|
TQLabel* label = new TQLabel(i18n("Hos&t: "), optionsWidget());
|
|
|
|
l->addWidget(label, ++row, 0);
|
|
|
|
m_hostEdit = new KLineEdit(optionsWidget());
|
|
|
|
connect(m_hostEdit, TQT_SIGNAL(textChanged(const TQString&)), TQT_SLOT(slotSetModified()));
|
|
|
|
l->addWidget(m_hostEdit, row, 1);
|
|
|
|
TQString w = i18n("The Internet Movie Database uses several different servers. Choose the one "
|
|
|
|
"you wish to use.");
|
|
|
|
TQWhatsThis::add(label, w);
|
|
|
|
TQWhatsThis::add(m_hostEdit, w);
|
|
|
|
label->setBuddy(m_hostEdit);
|
|
|
|
|
|
|
|
label = new TQLabel(i18n("&Maximum cast: "), optionsWidget());
|
|
|
|
l->addWidget(label, ++row, 0);
|
|
|
|
m_numCast = new KIntSpinBox(0, 99, 1, 10, 10, optionsWidget());
|
|
|
|
connect(m_numCast, TQT_SIGNAL(valueChanged(const TQString&)), TQT_SLOT(slotSetModified()));
|
|
|
|
l->addWidget(m_numCast, row, 1);
|
|
|
|
w = i18n("The list of cast members may include many people. Set the maximum number returned from the search.");
|
|
|
|
TQWhatsThis::add(label, w);
|
|
|
|
TQWhatsThis::add(m_numCast, w);
|
|
|
|
label->setBuddy(m_numCast);
|
|
|
|
|
|
|
|
m_fetchImageCheck = new TQCheckBox(i18n("Download cover &image"), optionsWidget());
|
|
|
|
connect(m_fetchImageCheck, TQT_SIGNAL(clicked()), TQT_SLOT(slotSetModified()));
|
|
|
|
++row;
|
|
|
|
l->addMultiCellWidget(m_fetchImageCheck, row, row, 0, 1);
|
|
|
|
w = i18n("The cover image may be downloaded as well. However, too many large images in the "
|
|
|
|
"collection may degrade performance.");
|
|
|
|
TQWhatsThis::add(m_fetchImageCheck, w);
|
|
|
|
|
|
|
|
l->setRowStretch(++row, 10);
|
|
|
|
|
|
|
|
// now add additional fields widget
|
|
|
|
addFieldsWidget(IMDBFetcher::customFields(), fetcher_ ? fetcher_->m_fields : TQStringList());
|
|
|
|
|
|
|
|
if(fetcher_) {
|
|
|
|
m_hostEdit->setText(fetcher_->m_host);
|
|
|
|
m_numCast->setValue(fetcher_->m_numCast);
|
|
|
|
m_fetchImageCheck->setChecked(fetcher_->m_fetchImages);
|
|
|
|
} else { //defaults
|
|
|
|
m_hostEdit->setText(TQString::fromLatin1(IMDB_SERVER));
|
|
|
|
m_numCast->setValue(10);
|
|
|
|
m_fetchImageCheck->setChecked(true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void IMDBFetcher::ConfigWidget::saveConfig(KConfigGroup& config_) {
|
|
|
|
TQString host = m_hostEdit->text().stripWhiteSpace();
|
|
|
|
if(!host.isEmpty()) {
|
|
|
|
config_.writeEntry("Host", host);
|
|
|
|
}
|
|
|
|
config_.writeEntry("Max Cast", m_numCast->value());
|
|
|
|
config_.writeEntry("Fetch Images", m_fetchImageCheck->isChecked());
|
|
|
|
|
|
|
|
saveFieldsConfig(config_);
|
|
|
|
slotSetModified(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
TQString IMDBFetcher::ConfigWidget::preferredName() const {
|
|
|
|
return IMDBFetcher::defaultName();
|
|
|
|
}
|
|
|
|
|
|
|
|
//static
|
|
|
|
Tellico::StringMap IMDBFetcher::customFields() {
|
|
|
|
StringMap map;
|
|
|
|
map[TQString::fromLatin1("imdb")] = i18n("IMDB Link");
|
|
|
|
map[TQString::fromLatin1("imdb-rating")] = i18n("IMDB Rating");
|
|
|
|
map[TQString::fromLatin1("alttitle")] = i18n("Alternative Titles");
|
|
|
|
map[TQString::fromLatin1("allcertification")] = i18n("Certifications");
|
|
|
|
return map;
|
|
|
|
}
|
|
|
|
|
|
|
|
#include "imdbfetcher.moc"
|