You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kbibtex/src/webquerygooglescholar.cpp

470 lines
18 KiB

/***************************************************************************
* Copyright (C) 2004-2009 by Thomas Fischer *
* fischer@unix-ag.uni-kl.de *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
#include <qfile.h>
#include <qapplication.h>
#include <qregexp.h>
#include <qtimer.h>
#include <qmap.h>
#include <qspinbox.h>
#include <kconfig.h>
#include <klineedit.h>
#include <klocale.h>
#include <kdebug.h>
#include <kmessagebox.h>
#include <kio/job.h>
#include <dcopref.h>
#include <settings.h>
#include "webquerygooglescholar.h"
namespace KBibTeX
{
WebQueryGoogleScholarWidget::WebQueryGoogleScholarWidget( QWidget *parent, const char *name )
: WebQueryWidget( parent, name )
{
init();
Settings *settings = Settings::self();
QString value = settings->getWebQueryDefault( "GoogleScholar" );
value = value == QString::null ? "" : value;
lineEditQuery->setText( value );
slotTextChanged( value, true );
}
WebQueryGoogleScholar::WebQueryGoogleScholar( QWidget* parent )
: WebQuery( parent ), m_transferJob( NULL ), m_transferJobBuffer( NULL )
{
m_importer = new BibTeX::FileImporterBibTeX( FALSE );
m_importer->setIgnoreComments( TRUE );
m_widget = new WebQueryGoogleScholarWidget( parent );
}
WebQueryGoogleScholar::~WebQueryGoogleScholar()
{
delete m_widget;
delete m_importer;
}
QString WebQueryGoogleScholar::title()
{
return i18n( "Google Scholar" );
}
QString WebQueryGoogleScholar::disclaimer()
{
return i18n( "About Google Scholar" );
}
QString WebQueryGoogleScholar::disclaimerURL()
{
return "http://scholar.google.com/intl/en/scholar/about.html";
}
WebQueryWidget *WebQueryGoogleScholar::widget()
{
return m_widget;
}
void WebQueryGoogleScholar::query()
{
WebQuery::query();
/** save search term in settings */
Settings *settings = Settings::self();
settings->setWebQueryDefault( "GoogleScholar", m_widget->lineEditQuery->text() );
/** generate web-save search term */
m_searchTerm = m_widget->lineEditQuery->text().stripWhiteSpace().replace( '$', "" );
m_searchTerm = m_searchTerm.replace( "%", "%25" ).replace( "+", "%2B" ).replace( " ", "%20" ).replace( "#", "%23" ).replace( "&", "%26" ).replace( "?", "%3F" );
if ( m_searchTerm.isEmpty() )
{
setEndSearch( WebQuery::statusInvalidQuery );
return;
}
/** initialize variables */
m_abort = false;
m_numberOfResults = m_widget->spinBoxMaxHits->value();
setNumStages( m_numberOfResults + 5 );
/** reset KDE configuration for cookie handling */
readAndChangeConfig();
/** prepare HTTP request (buffer, signals, job) */
m_transferJobBuffer = new QBuffer();
m_transferJobBuffer->open( IO_WriteOnly );
KIO::TransferJob* m_transferJob = KIO::get( KURL( "http://scholar.google.com/scholar_ncr" ), false, false );
connect( m_transferJob, SIGNAL( data( KIO::Job *, const QByteArray & ) ), this, SLOT( slotData( KIO::Job *, const QByteArray & ) ) );
connect( m_transferJob, SIGNAL( result( KIO::Job * ) ), this, SLOT( slotFinishedStartpage( KIO::Job * ) ) );
}
void WebQueryGoogleScholar::cancelQuery()
{
/** user aborted search */
m_abort = true;
if ( m_transferJob != NULL ) m_transferJob->kill( false );
setEndSearch( WebQuery::statusError );
}
void WebQueryGoogleScholar::slotFinishedStartpage( KIO::Job *job )
{
/** close and delete buffer (content does not matter) */
m_transferJobBuffer->close();
delete m_transferJobBuffer;
/** if aborted in the mean time, clean up everything */
if ( m_abort )
{
restoreConfig();
return;
}
/** error occurred */
if ( job->error() != 0 )
{
restoreConfig();
kdDebug() << "Error in slotFinishedStartpage: " << job->error() << endl;
setEndSearch( statusError );
return;
}
/** update progress bar */
enterNextStage();
/** prepare next HTTP request for preferences page (buffer, signals, job) */
m_transferJobBuffer = new QBuffer();
m_transferJobBuffer->open( IO_WriteOnly );
KIO::TransferJob* m_transferJob = KIO::get( KURL( "http://scholar.google.com/scholar_preferences?hl=en" ), false, false );
connect( m_transferJob, SIGNAL( data( KIO::Job *, const QByteArray & ) ), this, SLOT( slotData( KIO::Job *, const QByteArray & ) ) );
connect( m_transferJob, SIGNAL( result( KIO::Job * ) ), this, SLOT( slotFinishedLoadingSettings( KIO::Job * ) ) );
}
void WebQueryGoogleScholar::slotFinishedLoadingSettings( KIO::Job *job )
{
/** close and delete buffer (content does not matter) */
m_transferJobBuffer->close();
QString htmlCode = textFromBuffer( m_transferJobBuffer );
delete m_transferJobBuffer;
/** if aborted in the mean time, clean up everything */
if ( m_abort )
{
restoreConfig();
return;
}
/** error occurred */
if ( job->error() != 0 )
{
restoreConfig();
kdDebug() << "Error in slotFinishedLoadingSettings: " << job->error() << endl;
setEndSearch( statusError );
return;
}
/** update progress bar */
enterNextStage();
/** parse html code to get form values */
QMap<QString, QString> keyValues = evalFormFields( htmlCode );
/** set form values for BibTeX search */
keyValues["scis"] = "yes";
keyValues["scisf"] = "4";
keyValues["submit"] = "Save+Preferences";
keyValues["num"] = QString::number( m_numberOfResults );
/** prepare next HTTP request to submit preferences (buffer, signals, job) */
KURL nextUrl( formFieldsToUrl( "http://scholar.google.com/scholar_setprefs", keyValues ) );
m_transferJobBuffer = new QBuffer();
m_transferJobBuffer->open( IO_WriteOnly );
KIO::TransferJob* m_transferJob = KIO::get( nextUrl, false, false );
connect( m_transferJob, SIGNAL( data( KIO::Job *, const QByteArray & ) ), this, SLOT( slotData( KIO::Job *, const QByteArray & ) ) );
connect( m_transferJob, SIGNAL( result( KIO::Job * ) ), this, SLOT( slotFinishedSavingSettings( KIO::Job * ) ) );
}
void WebQueryGoogleScholar::slotFinishedSavingSettings( KIO::Job *job )
{
/** close and delete buffer (content does not matter) */
m_transferJobBuffer->close();
QString htmlCode = textFromBuffer( m_transferJobBuffer );
delete m_transferJobBuffer;
/** if aborted in the mean time, clean up everything */
if ( m_abort )
{
restoreConfig();
return;
}
/** error occurred */
if ( job->error() != 0 )
{
restoreConfig();
kdDebug() << "Error in slotFinishedSavingSettings: " << job->error() << endl;
setEndSearch( statusError );
return;
}
/** update progress bar */
enterNextStage();
/** parse html code to get form values */
QMap<QString, QString> keyValues = evalFormFields( htmlCode );
/** set form values for search */
keyValues["q"] = m_searchTerm;
keyValues["num"] = QString::number( m_numberOfResults );
/** prepare next HTTP request for actual search (buffer, signals, job) */
KURL nextUrl( formFieldsToUrl( "http://scholar.google.com/scholar", keyValues ) );
m_transferJobBuffer = new QBuffer();
m_transferJobBuffer->open( IO_WriteOnly );
KIO::TransferJob* m_transferJob = KIO::get( nextUrl, false, false );
connect( m_transferJob, SIGNAL( data( KIO::Job *, const QByteArray & ) ), this, SLOT( slotData( KIO::Job *, const QByteArray & ) ) );
connect( m_transferJob, SIGNAL( result( KIO::Job * ) ), this, SLOT( slotFinishedReceivingResultOverview( KIO::Job * ) ) );
}
void WebQueryGoogleScholar::slotFinishedReceivingResultOverview( KIO::Job *job )
{
/** close and delete buffer (content does not matter) */
m_transferJobBuffer->close();
QString htmlCode = textFromBuffer( m_transferJobBuffer );
delete m_transferJobBuffer;
/** if aborted in the mean time, clean up everything */
if ( m_abort )
{
restoreConfig();
return;
}
/** error occurred */
if ( job->error() != 0 )
{
restoreConfig();
kdDebug() << "Error in slotFinishedReceivingResultOverview: " << job->error() << endl;
setEndSearch( statusError );
return;
}
/** update progress bar */
enterNextStage();
/** find all links to BibTeX files in result page */
QRegExp reBibUrl( "/scholar.bib[^ \">]+" );
int pos = 0;
while ( !m_aborted && ( pos = htmlCode.find( reBibUrl, pos + 1 ) ) > 0 )
{
/** download individual BibTeX file for each search hit */
KURL bibUrl( "http://scholar.google.com" + reBibUrl.cap( 0 ).replace( "&amp;", "&" ) );
BibTeX::File *tmpBibFile = downloadBibTeXFile( bibUrl );
/** update progress bar */
enterNextStage();
/** parse, evaluate and store first BibTeX entry */
if ( tmpBibFile != NULL )
{
BibTeX::File::ElementList::iterator it = tmpBibFile->begin();
if ( it != tmpBibFile->end() )
{
BibTeX::Entry *entry = dynamic_cast<BibTeX::Entry*>( *it );
if ( entry != NULL )
emit foundEntry( new BibTeX::Entry( entry ), false );
}
delete tmpBibFile;
}
}
/** restore old cookie configuration */
restoreConfig();
/** set result status */
if ( m_aborted )
setEndSearch( statusAborted );
else
setEndSearch( statusSuccess );
}
void WebQueryGoogleScholar::readAndChangeConfig()
{
KConfig cfg( "kcookiejarrc" );
cfg.setGroup( "Cookie Policy" );
m_originalEnableCookies = cfg.readBoolEntry( "Cookies", true );
m_originalSessionCookies = cfg.readBoolEntry( "AcceptSessionCookies", true );
QStringList cookieSettingsList = QStringList::split( ',', cfg.readEntry( "CookieDomainAdvice", "" ) );
m_originalCookieGlobalAdvice = cfg.readEntry( "CookieGlobalAdvice", "Accept" );
for ( QStringList::Iterator it = cookieSettingsList.begin(); it != cookieSettingsList.end(); ++it )
{
QStringList keyValue = QStringList::split( ':', *it );
if ( keyValue.size() == 2 )
{
m_originalCookieMap[keyValue[0]] = keyValue[1];
}
}
cfg.writeEntry( "Cookies", true );
cfg.writeEntry( "CookieGlobalAdvice", "Accept" );
cfg.writeEntry( "AcceptSessionCookies", true );
cookieSettingsList.clear();
for ( QMap<QString, QString>::Iterator it = m_originalCookieMap.begin(); it != m_originalCookieMap.end(); ++it )
{
QString value = it.key().contains( ".google." ) ? "Accept" : it.data();
cookieSettingsList << it.key() + ":" + value;
}
cfg.writeEntry( "CookieDomainAdvice", cookieSettingsList.join( "," ) );
cfg.sync();
( void )DCOPRef( "kded", "kcookiejar" ).send( "reloadPolicy" );
}
void WebQueryGoogleScholar::restoreConfig()
{
KConfig cfg( "kcookiejarrc" );
cfg.setGroup( "Cookie Policy" );
cfg.writeEntry( "CookieGlobalAdvice", m_originalCookieGlobalAdvice );
cfg.writeEntry( "Cookies", m_originalEnableCookies );
cfg.writeEntry( "AcceptSessionCookies", m_originalSessionCookies );
QStringList cookieSettingsList;
for ( QMap<QString, QString>::Iterator it = m_originalCookieMap.begin(); it != m_originalCookieMap.end(); ++it )
cookieSettingsList << it.key() + ":" + it.data();
cfg.writeEntry( "CookieDomainAdvice", cookieSettingsList.join( "," ) );
cfg.sync();
if ( !m_originalEnableCookies )
( void )DCOPRef( "kded", "kcookiejar" ).send( "shutdown" );
else
( void )DCOPRef( "kded", "kcookiejar" ).send( "reloadPolicy" );
}
QString WebQueryGoogleScholar::textFromBuffer( QBuffer *buffer )
{
QString htmlCode = "";
buffer->open( IO_ReadOnly );
QTextStream ts( buffer );
while ( !ts.atEnd() )
htmlCode.append( ts.readLine() );
buffer->close();
return htmlCode;
}
QMap <QString, QString> WebQueryGoogleScholar::evalFormFields( const QString &htmlCode )
{
QMap<QString, QString> keyValues;
QRegExp reInput( "<input[^>]+>" );
QRegExp reSplit( "[<>=\" ]+" );
int pos = 0;
while (( pos = htmlCode.find( reInput, pos + 1 ) ) > 5 )
{
QStringList elements = QStringList::split( reSplit, reInput.cap( 0 ) );
bool checked = false;
bool isCheckable = false;
bool isSubmit = false;
QString key = QString::null;
QString value = QString::null;
for ( QStringList::Iterator it = elements.begin(); it != elements.end(); ++it )
{
if ( *it == "name" )
{
++it; if ( it != elements.end() ) key = *it; else break;
}
if ( *it == "value" )
{
++it; if ( it != elements.end() ) value = *it; else
{
value = ""; break;
}
}
if ( *it == "checked" )
checked = true;
if ( *it == "type" )
{
++it;
if ( it == elements.end() ) break;
isCheckable = *it == "radio" || *it == "checkbox";
isSubmit = *it == "submit";
}
}
if (( !isCheckable || checked ) && ( !isSubmit || value == "submit" ) && value != QString::null && key != QString::null )
{
keyValues[key] = value;
}
}
QRegExp reSelect( "<select name=([^ >\"]+).*</select>" );
reSelect.setMinimal( true );
QRegExp reOption( "<option[^>]+>" );
int pos3 = 0;
while (( pos3 = htmlCode.find( reSelect, pos3 + 1 ) ) > 5 )
{
QString key = reSelect.cap( 1 );
QString sub = reSelect.cap( 0 );
int pos2 = 0;
while (( pos2 = sub.find( reOption, pos2 + 1 ) ) > 5 )
{
QStringList elements = QStringList::split( reSplit, reOption.cap( 0 ) );
bool selected = false;
QString value = QString::null;
for ( QStringList::Iterator it = elements.begin(); it != elements.end(); ++it )
{
if ( *it == "value" )
{
++it; if ( it != elements.end() ) value = *it; else
{
value = ""; break;
}
}
if ( *it == "selected" )
selected = true;
}
if ( selected && value != QString::null && key != QString::null )
{
keyValues[key] = value;
}
}
}
return keyValues;
}
QString WebQueryGoogleScholar::formFieldsToUrl( const QString &prefix, const QMap<QString, QString> &keyValues )
{
bool first = true;
QString nextUrl = prefix;
for ( QMap<QString, QString>::ConstIterator it = keyValues.begin(); it != keyValues.end(); ++it )
{
if ( first )
nextUrl.append( "?" );
else
nextUrl.append( "&" );
first = false;
nextUrl.append( it.key() + "=" + it.data() );
}
return nextUrl;
}
}
#include "webquerygooglescholar.moc"