You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

569 lines
24 KiB

* Copyright (C) 2004-2009 by Thomas Fischer *
* *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
#include <qfile.h>
#include <qbuffer.h>
#include <qspinbox.h>
#include <klocale.h>
#include <klineedit.h>
#include <kmessagebox.h>
#include <kurl.h>
#include <fileimporterbibtex.h>
#include <encoderxml.h>
#include <settings.h>
#include "webqueryarxiv.h"
namespace KBibTeX
WebQueryArXivWidget::WebQueryArXivWidget( QWidget *parent, const char *name )
: WebQueryWidget( parent, name )
Settings *settings = Settings::self();
QString value = settings->getWebQueryDefault( "ArXiv" );
value = value == QString::null ? "" : value;
lineEditQuery->setText( value );
slotTextChanged( value, true );
WebQueryArXiv::WebQueryArXiv( QWidget* parent )
: WebQuery( parent ), m_arXivServer( "" ),
/** examples:
Journal of Inefficient Algorithms 5 (2003) 35-39
Astrophys.J. 578 (2002) L103-L106
New J. Phys. 10 (2008) 033023
Physics Letters A 297 (2002) 4-8
Appl.Phys. B75 (2002) 655-665
JHEP 0611 (2006) 045
m_jourRef1( "^([a-zA-Z. ]+[a-zA-Z.])\\s*(\\d+)\\s+\\((\\d{4})\\)\\s+([0-9A-Z]+)(-([0-9A-Z]+))?$" ),
/** examples:
Journal of Inefficient Algorithms, Vol. 93, No. 2 (2009), pp. 42-51
International Journal of Quantum Information, Vol. 1, No. 4 (2003) 427-441
Stud. Hist. Phil. Mod. Phys., Vol 33 no 3 (2003), pp. 441-468
m_jourRef2( "^([a-zA-Z. ]+[a-zA-Z.]),\\s+Vol\\.?\\s+(\\d+)[,]?\\s+No\\.?\\s+(\\d+)\\s+\\((\\d{4})\\)[,]?\\s+(pp\\.\\s+)?(\\d+)(-(\\d+))?$" ),
/** examples:
Journal of Inefficient Algorithms, volume 4, number 1, pp. 12-21, 2008
Scientometrics, volume 69, number 3, pp. 669-687, 2006
m_jourRef3( "^([a-zA-Z. ]+),\\s+volume\\s+(\\d+),\\s+number\\s+(\\d+),\\s+pp\\.\\s+(\\d+)(-(\\d+))?,\\s+(\\d{4})$" ),
/** examples:
Journal of Inefficient Algorithms 4(1): 101-122, 2010
Europhys. Letters 70:1-7 (2005)
Journal of Conflict Resolution 51(1): 58 - 88 (2007)
Journal of Artificial Intelligence Research (JAIR), 9:247-293
m_jourRef4( "^([a-zA-Z. ()]+)[,]?\\s*(\\d+)(\\((\\d+)\\))?:\\s*(\\d+)(\\s*-\\s*(\\d+))?(,\\s*(\\d{4})|\\s+\\((\\d{4})\\))?$" ),
/** examples:
Journal of Inefficient Algorithms vol. 31, 4 2000
Phys. Rev. A 71, 032339 (2005)
Phys. Rev. Lett. 91, 027901 (2003)
Phys. Rev. A 78, 013620 (2008)
Phys. Rev. E 62, 1842 (2000)
Rev. Mod. Phys. 79, 555 (2007)
J. Math. Phys. 49, 032105 (2008)
New J. Phys. 8, 58 (2006)
Phys. Rev. Lett. 91, 217905 (2003).
Physical Review B vol. 66, 161320(R) (2002)
??? Phys. Rev. Lett. 89, 057902(1--4) (2002).
??? J. Mod. Opt., 54, 2211 (2007)
m_jourRef5( "^([a-zA-Z. ]+)\\s+(vol\\.\\s+)?(\\d+),\\s+(\\d+)(\\([A-Z]+\\))?\\s+\\((\\d{4})\\)[.]?$" ),
/** examples:
Journal of Inefficient Algorithms, 11(2) (1999) 42-55
Learned Publishing, 20(1) (January 2007) 16-22
m_jourRef6( "^([a-zA-Z. ]+),\\s+(\\d+)\\((\\d+)\\)\\s+(\\(([A-Za-z]+\\s+)?(\\d{4})\\))?\\s+(\\d+)(-(\\d+))?$" ),
m_reJour( "^([a-zA-Z. ]+)" ), m_reYear( "\\b((18|19|20)\\d{2})\\b" ), m_rePages( "\\b([1-9]\\d{0,2})\\s*[-]+\\s*([1-9]\\d{0,2})\\b" )
m_importer = new BibTeX::FileImporterBibTeX( FALSE );
m_importer->setIgnoreComments( TRUE );
m_widget = new WebQueryArXivWidget( parent );
delete m_widget;
delete m_importer;
QString WebQueryArXiv::title()
return i18n( "arXiv" );
QString WebQueryArXiv::disclaimer()
return i18n( "arXiv is an archive for preprints" );
QString WebQueryArXiv::disclaimerURL()
return "";
WebQueryWidget *WebQueryArXiv::widget()
return m_widget;
void WebQueryArXiv::cancelQuery()
// FIXME: The following code crashes KBibTeX:
// if ( m_currentJob != NULL ) m_currentJob->kill( FALSE );
void WebQueryArXiv::query()
Settings *settings = Settings::self();
settings->setWebQueryDefault( "ArXiv", m_widget->lineEditQuery->text() );
m_numberOfResults = m_widget->spinBoxMaxHits->value();
setNumStages( m_numberOfResults + 1 );
QString searchTerm = m_widget->lineEditQuery->text().stripWhiteSpace().replace( '$', "" );
QStringList queryWords = QStringList::split( QRegExp( "\\s+" ), searchTerm );
if ( searchTerm.isEmpty() || queryWords.size() == 0 )
setEndSearch( WebQuery::statusInvalidQuery );
QString query;
for ( unsigned int i = 0; i < queryWords.size() - 1; ++i )
query = query.append( "AND " ).append( queryWords[i] ).append( " " );
query.append( queryWords[queryWords.size()-1] );
KURL url = KURL( QString( "" ).arg( m_numberOfResults ).arg( query.replace( "%", "%25" ).replace( "+", "%2B" ).replace( " ", "%20" ).replace( "#", "%23" ).replace( "&", "%26" ).replace( "?", "%3F" ) ) );
KIO::Job *job = KIO::storedGet( url, FALSE, FALSE );
connect( job, SIGNAL( result( KIO::Job * ) ), this, SLOT( unlockJob( KIO::Job * ) ) );
connect( job, SIGNAL( result( KIO::Job * ) ), this, SLOT( arXivResult( KIO::Job * ) ) );
void WebQueryArXiv::unlockJob( KIO::Job * )
void WebQueryArXiv::arXivResult( KIO::Job *job )
if ( job->error() == 0 && !m_aborted )
QBuffer data; IO_WriteOnly );
data.writeBlock( dynamic_cast<KIO::StoredTransferJob*>( job )->data() );
data.close(); IO_ReadOnly );
QTextStream ts( &data );
QString result =;
int p = -1;
m_totalHits = 0;
m_receivedHits = 0;
while ( !m_aborted && ( int ) m_totalHits < m_numberOfResults && ( p = result.find( "arXiv:", p + 1 ) ) >= 0 )
int p2 = result.find( "<", p + 2 );
QString hit = result.mid( p + 6, p2 - p - 6 );
p = p2 + 1;
KURL url = KURL( QString( "http://%2/abs/%1" ).arg( hit ).arg( m_arXivServer ) );
m_urls.append( url );
if ( m_totalHits == 0 )
setEndSearch( WebQuery::statusSuccess );
else if ( !m_urls.isEmpty() )
KURL url = m_urls.first();
m_urls.remove( url );
fetchFromAbstract( url );
setEndSearch( WebQuery::statusError );
void WebQueryArXiv::fetchFromAbstract( const KURL &abstractURL )
m_aborted = false;
KIO::Job *job = KIO::storedGet( abstractURL, FALSE, FALSE );
connect( job, SIGNAL( result( KIO::Job * ) ), this, SLOT( unlockJob( KIO::Job * ) ) );
connect( job, SIGNAL( result( KIO::Job * ) ), this, SLOT( arXivAbstractResult( KIO::Job * ) ) );
void WebQueryArXiv::arXivAbstractResult( KIO::Job *job )
if ( job->error() == 0 && !m_aborted )
QBuffer data; IO_WriteOnly );
data.writeBlock( dynamic_cast<KIO::StoredTransferJob*>( job )->data() );
data.close(); IO_ReadOnly );
QTextStream ts( &data );
QString result = BibTeX::EncoderXML::currentEncoderXML()->decode( );
/** find id */
int p = result.find( "arXiv:", 0 );
if ( p < 0 ) return;
int p2 = result.find( "<", p + 2 );
if ( p2 < 0 ) return;
QString id = result.mid( p + 6, p2 - p - 6 );
/** find cite_as */
QString citeas = "";
p = result.find( "Cite&nbsp;as", 0 );
p = result.find( ">arXiv:", p );
p2 = result.find( "</", p );
if ( p >= 0 && p2 >= 0 )
citeas = result.mid( p + 7, p2 - p - 7 );
BibTeX::Entry * entry = new BibTeX::Entry( BibTeX::Entry::etMisc, citeas.isEmpty() ? ( id.isEmpty() ? QString( "arXiv" ).append( m_receivedHits ) : id ) : citeas );
/** find abstract */
p = result.find( "Abstract:</span>", 0 );
if ( p < 0 ) return;
p2 = result.find( "</blockq", p + 14 );
if ( p2 < 0 ) return;
QString abstract = result.mid( p + 16, p2 - p - 16 ).replace( QRegExp( "\\s+" ), " " ).replace( QRegExp( "^\\s+|\\s+$" ), "" ).replace( QRegExp( "<[^>]+>" ), "" );
BibTeX::EntryField * field = new BibTeX::EntryField( BibTeX::EntryField::ftAbstract );
entry->addField( field );
field->setValue( new BibTeX::Value( abstract ) );
/** find authors */
BibTeX::PersonContainer *personContainer = new BibTeX::PersonContainer( Settings::self()->editing_FirstNameFirst );
p = -1;
while (( p = result.find( "/au:", p + 1 ) ) > 0 )
p = result.find( ">", p + 1 );
p2 = result.find( "<", p + 1 );
QString author = result.mid( p + 1, p2 - p - 1 );
personContainer->persons.append( new BibTeX::Person( author ) );
if ( personContainer->persons.isEmpty() )
delete personContainer;
BibTeX::EntryField * field = new BibTeX::EntryField( BibTeX::EntryField::ftAuthor );
entry->addField( field );
BibTeX::Value *value = new BibTeX::Value();
value->items.append( personContainer );
field->setValue( value );
/** find title */
p = result.find( "Title:</span>", 0 );
p2 = result.find( "<", p + 10 );
if ( p >= 0 && p2 >= 0 )
QString title = result.mid( p + 13, p2 - p - 13 ).replace( QRegExp( "\\s+" ), " " ).replace( QRegExp( "^\\s+|\\s+$" ), "" );
BibTeX::EntryField * field = new BibTeX::EntryField( BibTeX::EntryField::ftTitle );
entry->addField( field );
field->setValue( new BibTeX::Value( title ) );
/** find month and year */
p = result.find( "Submitted on", 0 );
while (( p2 = result.find( "last revised", p + 1 ) ) >= 0 )
p = p2;
p2 = result.find( QRegExp( "\\d\\d\\d\\d" ), p );
bool ok = FALSE;
int year = result.mid( p2, 4 ).toInt( &ok );
if ( !ok ) year = 0;
if ( year > 1000 )
BibTeX::EntryField * field = new BibTeX::EntryField( BibTeX::EntryField::ftYear );
entry->addField( field );
field->setValue( new BibTeX::Value( QString::number( year ) ) );
p2 = result.find( QRegExp( "\\b[A-Z][a-z]{2}\\b" ), p );
if ( p2 >= 0 )
QString month = result.mid( p2, 3 ).lower();
BibTeX::EntryField * field = new BibTeX::EntryField( BibTeX::EntryField::ftMonth );
entry->addField( field );
BibTeX::Value *value = new BibTeX::Value();
value->items.append( new BibTeX::MacroKey( month ) );
field->setValue( value );
/** find DOI */
p = result.find( "", 0 );
p2 = result.find( "\"", p + 1 );
if ( p >= 0 && p2 >= 0 )
QString doi = result.mid( p, p2 - p );
BibTeX::EntryField * field = new BibTeX::EntryField( BibTeX::EntryField::ftDoi );
entry->addField( field );
field->setValue( new BibTeX::Value( doi ) );
/** find keywords */
p = result.find( "<td class=\"tablecell subjects\">", 0 );
p2 = result.find( "</td>", p + 1 );
if ( p >= 0 && p2 >= 0 )
QString keywords = result.mid( p + 31, p2 - p - 31 ).replace( QRegExp( "</?span[^>]*>" ), "" );
BibTeX::EntryField * field = new BibTeX::EntryField( BibTeX::EntryField::ftKeywords );
entry->addField( field );
BibTeX::Value *value = new BibTeX::Value();
value->items.append( new BibTeX::KeywordContainer( keywords ) );
field->setValue( value );
/** find ACM classes */
p = result.find( "<td class=\"tablecell acm-classes\">", 0 );
p2 = result.find( "</td>", p + 1 );
if ( p >= 0 && p2 >= 0 )
QString acmclasses = result.mid( p + 34, p2 - p - 34 );
BibTeX::EntryField * field = new BibTeX::EntryField( "acm-classes" );
entry->addField( field );
field->setValue( new BibTeX::Value( acmclasses ) );
/** find versions */
for ( int v = 1; !m_aborted && v < 20; ++v )
p = result.find( QString( ">[v%1]<" ).arg( v ), 0 );
if ( p < 0 ) break;
int p3 = result.findRev( "href=\"", p );
if ( p3 >= 0 && p3 > p - 40 )
p2 = result.find( "\">", p3 );
if ( p2 >= 0 )
QString url = result.mid( p3 + 6, p2 - p3 - 6 );
BibTeX::EntryField * field = new BibTeX::EntryField( QString( "v%1url" ).arg( v ) );
entry->addField( field );
field->setValue( new BibTeX::Value( QString( "" ).append( url ) ) );
p = result.find( "</b>", p + 1 );
p2 = result.find( "<br", p + 1 );
if ( p >= 0 && p2 >= 0 )
QString version = result.mid( p + 5, p2 - p - 5 );
BibTeX::EntryField * field = new BibTeX::EntryField( QString( "v%1descr" ).arg( v ) );
entry->addField( field );
field->setValue( new BibTeX::Value( version ) );
/** find tech report reference */
p = result.find( "<td class=\"tablecell report-number\">", 0 );
p2 = result.find( "</td>", p + 1 );
if ( p >= 0 && p2 >= 0 )
QString techRepNr = result.mid( p + 36, p2 - p - 36 );
BibTeX::EntryField * field = new BibTeX::EntryField( BibTeX::EntryField::ftNumber );
entry->addField( field );
field->setValue( new BibTeX::Value( techRepNr ) );
entry->setEntryType( BibTeX::Entry::etTechReport );
/** find journal reference */
p = result.find( "<td class=\"tablecell jref\">", 0 );
p2 = result.find( "</td>", p + 1 );
if ( p >= 0 && p2 >= 0 )
QString jref = result.mid( p + 27, p2 - p - 27 );
jref.replace( "\n", " " );
QString jTitle = "";
QString jVol = "";
QString jNo = "";
QString jYear = "";
QString jPage1 = "";
QString jPage2 = "";
// m_jourRef1( "^([a-zA-Z. ]+[a-zA-Z.])\\s*(\\d+)\\s+\\((\\d{4})\\)\\s+([0-9A-Z]+)(-([0-9A-Z]+))?$" )
if ( jref ) == 0 )
jTitle = m_jourRef1.cap( 1 );
jVol = m_jourRef1.cap( 2 );
jYear = m_jourRef1.cap( 3 );
jPage1 = m_jourRef1.cap( 4 );
jPage2 = m_jourRef1.cap( 6 );
// m_jourRef2( "^([a-zA-Z. ]+[a-zA-Z.]),\s+Vol[.]?\s+(\d+)[,]?\s+No[.]?\s+(\d+)\s+\((\d{4})\)[,]?\s+(pp\.\s+)?(\d+)(-(\d+))?$" )
else if ( jref ) == 0 )
jTitle = m_jourRef2.cap( 1 );
jVol = m_jourRef2.cap( 2 );
jNo = m_jourRef2.cap( 3 );
jYear = m_jourRef2.cap( 4 );
jPage1 = m_jourRef2.cap( 6 );
jPage2 = m_jourRef2.cap( 8 );
// m_jourRef3( "^([a-zA-Z. ]+),\\s+volume\\s+(\\d+),\\s+number\\s+(\\d+),\\s+pp\\.\\s+(\\d+)(-(\\d+))?,\\s+(\\d{4})$" )
else if ( jref ) == 0 )
jTitle = m_jourRef3.cap( 1 );
jVol = m_jourRef3.cap( 2 );
jNo = m_jourRef3.cap( 3 );
jPage1 = m_jourRef3.cap( 4 );
jPage2 = m_jourRef3.cap( 6 );
jYear = m_jourRef3.cap( 7 );
// m_jourRef4("^([a-zA-Z. ()]+[a-zA-Z.()])[,]?\\s*(\\d+)(\\((\\d+)\\))?:\\s*(\\d+)(\\s*-\\s*(\\d+))?(,\\s*(\\d{4})|\\s+\\((\\d{4})\\))?$")
else if ( jref ) == 0 )
jTitle = m_jourRef4.cap( 1 );
jVol = m_jourRef4.cap( 2 );
jNo = m_jourRef4.cap( 4 );
jPage1 = m_jourRef4.cap( 5 );
jPage2 = m_jourRef4.cap( 7 );
jYear = m_jourRef4.cap( 9 ).append( m_jourRef4.cap( 10 ) );
// m_jourRef5("^([a-zA-Z. ]+)\\s+(vol\\.\\s+)?(\\d+),\\s+(\\d+)(\\([A-Z]+\\))?\\s+\\((\\d{4})\\)[.]?$")
else if ( jref ) == 0 )
jTitle = m_jourRef5.cap( 1 );
jVol = m_jourRef5.cap( 3 );
jPage1 = m_jourRef5.cap( 4 );
jYear = m_jourRef5.cap( 6 );
// m_jourRef6("^([a-zA-Z. ]+),\\s+(\\d+)\\((\\d+)\\)\\s+(\\(([A-Za-z]+\\s+)?(\\d{4})\\))?\\s+(\\d+)(-(\\d+))?$")
else if ( jref ) == 0 )
jTitle = m_jourRef6.cap( 1 );
jVol = m_jourRef6.cap( 2 );
jNo = m_jourRef6.cap( 3 );
jYear = m_jourRef6.cap( 6 );
jPage1 = m_jourRef6.cap( 7 );
jPage2 = m_jourRef6.cap( 9 );
if ( jref ) == 0 )
jTitle = m_reJour.cap( 1 );
if ( jref ) == 0 )
jYear = m_reYear.cap( 1 );
if ( jref ) > -1 )
jPage1 = m_rePages.cap( 1 );
jPage2 = m_rePages.cap( 2 );
if ( !jTitle.isEmpty() )
entry->deleteField( BibTeX::EntryField::ftJournal );
BibTeX::EntryField * field = new BibTeX::EntryField( BibTeX::EntryField::ftJournal );
entry->addField( field );
field->setValue( new BibTeX::Value( jTitle ) );
entry->setEntryType( BibTeX::Entry::etArticle );
if ( !jVol.isEmpty() )
entry->deleteField( BibTeX::EntryField::ftVolume );
BibTeX::EntryField * field = new BibTeX::EntryField( BibTeX::EntryField::ftVolume );
entry->addField( field );
field->setValue( new BibTeX::Value( jVol ) );
if ( !jNo.isEmpty() )
entry->deleteField( BibTeX::EntryField::ftNumber );
BibTeX::EntryField * field = new BibTeX::EntryField( BibTeX::EntryField::ftNumber );
entry->addField( field );
field->setValue( new BibTeX::Value( jNo ) );
if ( !jYear.isEmpty() )
entry->deleteField( BibTeX::EntryField::ftYear );
entry->deleteField( BibTeX::EntryField::ftMonth );
BibTeX::EntryField * field = new BibTeX::EntryField( BibTeX::EntryField::ftYear );
entry->addField( field );
field->setValue( new BibTeX::Value( jYear ) );
if ( !jPage1.isEmpty() )
entry->deleteField( BibTeX::EntryField::ftPages );
BibTeX::EntryField * field = new BibTeX::EntryField( BibTeX::EntryField::ftPages );
entry->addField( field );
QString text = jPage1;
if ( !jPage2.isEmpty() ) text.append( "--" ).append( jPage2 );
field->setValue( new BibTeX::Value( text ) );
if ( result.find( QRegExp( "Ph\\.?D\\.? Thesis", FALSE ), 0 ) >= 0 )
entry->setEntryType( BibTeX::Entry::etPhDThesis );
field = new BibTeX::EntryField( BibTeX::EntryField::ftURL );
entry->addField( field );
field->setValue( new BibTeX::Value( QString( "" ).arg( id ) ) );
field = new BibTeX::EntryField( "pdf" );
entry->addField( field );
field->setValue( new BibTeX::Value( QString( "" ).arg( id ) ) );
emit foundEntry( entry, false );
if ( m_totalHits == m_receivedHits )
setEndSearch( WebQuery::statusSuccess );
else if ( !m_urls.isEmpty() )
KURL url = m_urls.first();
m_urls.remove( url );
KIO::Job *job = KIO::storedGet( url, FALSE, FALSE );
connect( job, SIGNAL( result( KIO::Job * ) ), this, SLOT( unlockJob( KIO::Job * ) ) );
connect( job, SIGNAL( result( KIO::Job * ) ), this, SLOT( arXivAbstractResult( KIO::Job * ) ) );
setEndSearch( WebQuery::statusSuccess );
setEndSearch( WebQuery::statusError );
#include "webqueryarxiv.moc"