You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kbibtex/src/webquerygooglescholar.cpp

470 lines
18 KiB

/***************************************************************************
* Copyright (C) 2004-2009 by Thomas Fischer *
* fischer@unix-ag.uni-kl.de *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
#include <tqfile.h>
#include <tqapplication.h>
#include <tqregexp.h>
#include <tqtimer.h>
#include <tqmap.h>
#include <tqspinbox.h>
#include <tdeconfig.h>
#include <klineedit.h>
#include <tdelocale.h>
#include <kdebug.h>
#include <tdemessagebox.h>
#include <tdeio/job.h>
#include <dcopref.h>
#include "settings.h"
#include "webquerygooglescholar.h"
namespace KBibTeX
{
WebQueryGoogleScholarWidget::WebQueryGoogleScholarWidget( TQWidget *parent, const char *name )
: WebQueryWidget( parent, name )
{
init();
Settings *settings = Settings::self();
TQString value = settings->getWebQueryDefault( "GoogleScholar" );
value = value == TQString::null ? "" : value;
lineEditQuery->setText( value );
slotTextChanged( value, true );
}
WebQueryGoogleScholar::WebQueryGoogleScholar( TQWidget* parent )
: WebQuery( parent ), m_transferJob( NULL ), m_transferJobBuffer( NULL )
{
m_importer = new BibTeX::FileImporterBibTeX( FALSE );
m_importer->setIgnoreComments( TRUE );
m_widget = new WebQueryGoogleScholarWidget( parent );
}
WebQueryGoogleScholar::~WebQueryGoogleScholar()
{
delete m_widget;
delete m_importer;
}
TQString WebQueryGoogleScholar::title()
{
return i18n( "Google Scholar" );
}
TQString WebQueryGoogleScholar::disclaimer()
{
return i18n( "About Google Scholar" );
}
TQString WebQueryGoogleScholar::disclaimerURL()
{
return "http://scholar.google.com/intl/en/scholar/about.html";
}
WebQueryWidget *WebQueryGoogleScholar::widget()
{
return m_widget;
}
void WebQueryGoogleScholar::query()
{
WebQuery::query();
/** save search term in settings */
Settings *settings = Settings::self();
settings->setWebQueryDefault( "GoogleScholar", m_widget->lineEditQuery->text() );
/** generate web-save search term */
m_searchTerm = m_widget->lineEditQuery->text().stripWhiteSpace().replace( '$', "" );
m_searchTerm = m_searchTerm.replace( "%", "%25" ).replace( "+", "%2B" ).replace( " ", "%20" ).replace( "#", "%23" ).replace( "&", "%26" ).replace( "?", "%3F" );
if ( m_searchTerm.isEmpty() )
{
setEndSearch( WebQuery::statusInvalidQuery );
return;
}
/** initialize variables */
m_abort = false;
m_numberOfResults = m_widget->spinBoxMaxHits->value();
setNumStages( m_numberOfResults + 5 );
/** reset KDE configuration for cookie handling */
readAndChangeConfig();
/** prepare HTTP request (buffer, signals, job) */
m_transferJobBuffer = new TQBuffer();
m_transferJobBuffer->open( IO_WriteOnly );
TDEIO::TransferJob* m_transferJob = TDEIO::get( KURL( "http://scholar.google.com/scholar_ncr" ), false, false );
connect( m_transferJob, SIGNAL( data( TDEIO::Job *, const TQByteArray & ) ), this, SLOT( slotData( TDEIO::Job *, const TQByteArray & ) ) );
connect( m_transferJob, SIGNAL( result( TDEIO::Job * ) ), this, SLOT( slotFinishedStartpage( TDEIO::Job * ) ) );
}
void WebQueryGoogleScholar::cancelQuery()
{
/** user aborted search */
m_abort = true;
if ( m_transferJob != NULL ) m_transferJob->kill( false );
setEndSearch( WebQuery::statusError );
}
void WebQueryGoogleScholar::slotFinishedStartpage( TDEIO::Job *job )
{
/** close and delete buffer (content does not matter) */
m_transferJobBuffer->close();
delete m_transferJobBuffer;
/** if aborted in the mean time, clean up everything */
if ( m_abort )
{
restoreConfig();
return;
}
/** error occurred */
if ( job->error() != 0 )
{
restoreConfig();
kdDebug() << "Error in slotFinishedStartpage: " << job->error() << endl;
setEndSearch( statusError );
return;
}
/** update progress bar */
enterNextStage();
/** prepare next HTTP request for preferences page (buffer, signals, job) */
m_transferJobBuffer = new TQBuffer();
m_transferJobBuffer->open( IO_WriteOnly );
TDEIO::TransferJob* m_transferJob = TDEIO::get( KURL( "http://scholar.google.com/scholar_preferences?hl=en" ), false, false );
connect( m_transferJob, SIGNAL( data( TDEIO::Job *, const TQByteArray & ) ), this, SLOT( slotData( TDEIO::Job *, const TQByteArray & ) ) );
connect( m_transferJob, SIGNAL( result( TDEIO::Job * ) ), this, SLOT( slotFinishedLoadingSettings( TDEIO::Job * ) ) );
}
void WebQueryGoogleScholar::slotFinishedLoadingSettings( TDEIO::Job *job )
{
/** close and delete buffer (content does not matter) */
m_transferJobBuffer->close();
TQString htmlCode = textFromBuffer( m_transferJobBuffer );
delete m_transferJobBuffer;
/** if aborted in the mean time, clean up everything */
if ( m_abort )
{
restoreConfig();
return;
}
/** error occurred */
if ( job->error() != 0 )
{
restoreConfig();
kdDebug() << "Error in slotFinishedLoadingSettings: " << job->error() << endl;
setEndSearch( statusError );
return;
}
/** update progress bar */
enterNextStage();
/** parse html code to get form values */
TQMap<TQString, TQString> keyValues = evalFormFields( htmlCode );
/** set form values for BibTeX search */
keyValues["scis"] = "yes";
keyValues["scisf"] = "4";
keyValues["submit"] = "Save+Preferences";
keyValues["num"] = TQString::number( m_numberOfResults );
/** prepare next HTTP request to submit preferences (buffer, signals, job) */
KURL nextUrl( formFieldsToUrl( "http://scholar.google.com/scholar_setprefs", keyValues ) );
m_transferJobBuffer = new TQBuffer();
m_transferJobBuffer->open( IO_WriteOnly );
TDEIO::TransferJob* m_transferJob = TDEIO::get( nextUrl, false, false );
connect( m_transferJob, SIGNAL( data( TDEIO::Job *, const TQByteArray & ) ), this, SLOT( slotData( TDEIO::Job *, const TQByteArray & ) ) );
connect( m_transferJob, SIGNAL( result( TDEIO::Job * ) ), this, SLOT( slotFinishedSavingSettings( TDEIO::Job * ) ) );
}
void WebQueryGoogleScholar::slotFinishedSavingSettings( TDEIO::Job *job )
{
/** close and delete buffer (content does not matter) */
m_transferJobBuffer->close();
TQString htmlCode = textFromBuffer( m_transferJobBuffer );
delete m_transferJobBuffer;
/** if aborted in the mean time, clean up everything */
if ( m_abort )
{
restoreConfig();
return;
}
/** error occurred */
if ( job->error() != 0 )
{
restoreConfig();
kdDebug() << "Error in slotFinishedSavingSettings: " << job->error() << endl;
setEndSearch( statusError );
return;
}
/** update progress bar */
enterNextStage();
/** parse html code to get form values */
TQMap<TQString, TQString> keyValues = evalFormFields( htmlCode );
/** set form values for search */
keyValues["q"] = m_searchTerm;
keyValues["num"] = TQString::number( m_numberOfResults );
/** prepare next HTTP request for actual search (buffer, signals, job) */
KURL nextUrl( formFieldsToUrl( "http://scholar.google.com/scholar", keyValues ) );
m_transferJobBuffer = new TQBuffer();
m_transferJobBuffer->open( IO_WriteOnly );
TDEIO::TransferJob* m_transferJob = TDEIO::get( nextUrl, false, false );
connect( m_transferJob, SIGNAL( data( TDEIO::Job *, const TQByteArray & ) ), this, SLOT( slotData( TDEIO::Job *, const TQByteArray & ) ) );
connect( m_transferJob, SIGNAL( result( TDEIO::Job * ) ), this, SLOT( slotFinishedReceivingResultOverview( TDEIO::Job * ) ) );
}
void WebQueryGoogleScholar::slotFinishedReceivingResultOverview( TDEIO::Job *job )
{
/** close and delete buffer (content does not matter) */
m_transferJobBuffer->close();
TQString htmlCode = textFromBuffer( m_transferJobBuffer );
delete m_transferJobBuffer;
/** if aborted in the mean time, clean up everything */
if ( m_abort )
{
restoreConfig();
return;
}
/** error occurred */
if ( job->error() != 0 )
{
restoreConfig();
kdDebug() << "Error in slotFinishedReceivingResultOverview: " << job->error() << endl;
setEndSearch( statusError );
return;
}
/** update progress bar */
enterNextStage();
/** find all links to BibTeX files in result page */
TQRegExp reBibUrl( "/scholar.bib[^ \">]+" );
int pos = 0;
while ( !m_aborted && ( pos = htmlCode.find( reBibUrl, pos + 1 ) ) > 0 )
{
/** download individual BibTeX file for each search hit */
KURL bibUrl( "http://scholar.google.com" + reBibUrl.cap( 0 ).replace( "&amp;", "&" ) );
BibTeX::File *tmpBibFile = downloadBibTeXFile( bibUrl );
/** update progress bar */
enterNextStage();
/** parse, evaluate and store first BibTeX entry */
if ( tmpBibFile != NULL )
{
BibTeX::File::ElementList::iterator it = tmpBibFile->begin();
if ( it != tmpBibFile->end() )
{
BibTeX::Entry *entry = dynamic_cast<BibTeX::Entry*>( *it );
if ( entry != NULL )
emit foundEntry( new BibTeX::Entry( entry ), false );
}
delete tmpBibFile;
}
}
/** restore old cookie configuration */
restoreConfig();
/** set result status */
if ( m_aborted )
setEndSearch( statusAborted );
else
setEndSearch( statusSuccess );
}
void WebQueryGoogleScholar::readAndChangeConfig()
{
TDEConfig cfg( "kcookiejarrc" );
cfg.setGroup( "Cookie Policy" );
m_originalEnableCookies = cfg.readBoolEntry( "Cookies", true );
m_originalSessionCookies = cfg.readBoolEntry( "AcceptSessionCookies", true );
TQStringList cookieSettingsList = TQStringList::split( ',', cfg.readEntry( "CookieDomainAdvice", "" ) );
m_originalCookieGlobalAdvice = cfg.readEntry( "CookieGlobalAdvice", "Accept" );
for ( TQStringList::Iterator it = cookieSettingsList.begin(); it != cookieSettingsList.end(); ++it )
{
TQStringList keyValue = TQStringList::split( ':', *it );
if ( keyValue.size() == 2 )
{
m_originalCookieMap[keyValue[0]] = keyValue[1];
}
}
cfg.writeEntry( "Cookies", true );
cfg.writeEntry( "CookieGlobalAdvice", "Accept" );
cfg.writeEntry( "AcceptSessionCookies", true );
cookieSettingsList.clear();
for ( TQMap<TQString, TQString>::Iterator it = m_originalCookieMap.begin(); it != m_originalCookieMap.end(); ++it )
{
TQString value = it.key().contains( ".google." ) ? "Accept" : it.data();
cookieSettingsList << it.key() + ":" + value;
}
cfg.writeEntry( "CookieDomainAdvice", cookieSettingsList.join( "," ) );
cfg.sync();
( void )DCOPRef( "kded", "kcookiejar" ).send( "reloadPolicy" );
}
void WebQueryGoogleScholar::restoreConfig()
{
TDEConfig cfg( "kcookiejarrc" );
cfg.setGroup( "Cookie Policy" );
cfg.writeEntry( "CookieGlobalAdvice", m_originalCookieGlobalAdvice );
cfg.writeEntry( "Cookies", m_originalEnableCookies );
cfg.writeEntry( "AcceptSessionCookies", m_originalSessionCookies );
TQStringList cookieSettingsList;
for ( TQMap<TQString, TQString>::Iterator it = m_originalCookieMap.begin(); it != m_originalCookieMap.end(); ++it )
cookieSettingsList << it.key() + ":" + it.data();
cfg.writeEntry( "CookieDomainAdvice", cookieSettingsList.join( "," ) );
cfg.sync();
if ( !m_originalEnableCookies )
( void )DCOPRef( "kded", "kcookiejar" ).send( "shutdown" );
else
( void )DCOPRef( "kded", "kcookiejar" ).send( "reloadPolicy" );
}
TQString WebQueryGoogleScholar::textFromBuffer( TQBuffer *buffer )
{
TQString htmlCode = "";
buffer->open( IO_ReadOnly );
TQTextStream ts( buffer );
while ( !ts.atEnd() )
htmlCode.append( ts.readLine() );
buffer->close();
return htmlCode;
}
TQMap <TQString, TQString> WebQueryGoogleScholar::evalFormFields( const TQString &htmlCode )
{
TQMap<TQString, TQString> keyValues;
TQRegExp reInput( "<input[^>]+>" );
TQRegExp reSplit( "[<>=\" ]+" );
int pos = 0;
while (( pos = htmlCode.find( reInput, pos + 1 ) ) > 5 )
{
TQStringList elements = TQStringList::split( reSplit, reInput.cap( 0 ) );
bool checked = false;
bool isCheckable = false;
bool isSubmit = false;
TQString key = TQString::null;
TQString value = TQString::null;
for ( TQStringList::Iterator it = elements.begin(); it != elements.end(); ++it )
{
if ( *it == "name" )
{
++it; if ( it != elements.end() ) key = *it; else break;
}
if ( *it == "value" )
{
++it; if ( it != elements.end() ) value = *it; else
{
value = ""; break;
}
}
if ( *it == "checked" )
checked = true;
if ( *it == "type" )
{
++it;
if ( it == elements.end() ) break;
isCheckable = *it == "radio" || *it == "checkbox";
isSubmit = *it == "submit";
}
}
if (( !isCheckable || checked ) && ( !isSubmit || value == "submit" ) && value != TQString::null && key != TQString::null )
{
keyValues[key] = value;
}
}
TQRegExp reSelect( "<select name=([^ >\"]+).*</select>" );
reSelect.setMinimal( true );
TQRegExp reOption( "<option[^>]+>" );
int pos3 = 0;
while (( pos3 = htmlCode.find( reSelect, pos3 + 1 ) ) > 5 )
{
TQString key = reSelect.cap( 1 );
TQString sub = reSelect.cap( 0 );
int pos2 = 0;
while (( pos2 = sub.find( reOption, pos2 + 1 ) ) > 5 )
{
TQStringList elements = TQStringList::split( reSplit, reOption.cap( 0 ) );
bool selected = false;
TQString value = TQString::null;
for ( TQStringList::Iterator it = elements.begin(); it != elements.end(); ++it )
{
if ( *it == "value" )
{
++it; if ( it != elements.end() ) value = *it; else
{
value = ""; break;
}
}
if ( *it == "selected" )
selected = true;
}
if ( selected && value != TQString::null && key != TQString::null )
{
keyValues[key] = value;
}
}
}
return keyValues;
}
TQString WebQueryGoogleScholar::formFieldsToUrl( const TQString &prefix, const TQMap<TQString, TQString> &keyValues )
{
bool first = true;
TQString nextUrl = prefix;
for ( TQMap<TQString, TQString>::ConstIterator it = keyValues.begin(); it != keyValues.end(); ++it )
{
if ( first )
nextUrl.append( "?" );
else
nextUrl.append( "&" );
first = false;
nextUrl.append( it.key() + "=" + it.data() );
}
return nextUrl;
}
}
#include "webquerygooglescholar.moc"