You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kbibtex/src/webquerypubmed.cpp

536 lines
21 KiB

/***************************************************************************
* Copyright (C) 2004-2009 by Thomas Fischer *
* fischer@unix-ag.uni-kl.de *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
#include <ntqwidget.h>
#include <ntqdom.h>
#include <ntqapplication.h>
#include <ntqstringlist.h>
#include <ntqbuffer.h>
#include <ntqcstring.h>
#include <ntqregexp.h>
#include <ntqspinbox.h>
#include <klineedit.h>
#include <kdialog.h>
#include <tdelocale.h>
#include <kurl.h>
#include <tdemessagebox.h>
#include <tdeio/netaccess.h>
#include <entryfield.h>
#include <value.h>
#include <settings.h>
#include "webquerypubmed.h"
namespace KBibTeX
{
WebQueryPubMedWidget::WebQueryPubMedWidget( TQWidget *parent, const char *name )
: WebQueryWidget( parent, name )
{
init();
Settings *settings = Settings::self();
TQString value = settings->getWebQueryDefault( "PubMed" );
value = value == TQString::null ? "" : value;
lineEditQuery->setText( value );
slotTextChanged( value, true );
}
WebQueryPubMed::WebQueryPubMed( TQWidget *parent ) : WebQuery( parent )
{
m_widget = new WebQueryPubMedWidget( parent );
}
WebQueryPubMed::~WebQueryPubMed()
{
delete m_widget;
}
TQString WebQueryPubMed::title()
{
return i18n( "NCBI (PubMed)" );
}
TQString WebQueryPubMed::disclaimer()
{
return i18n( "NCBI's Disclaimer and Copyright" );
}
TQString WebQueryPubMed::disclaimerURL()
{
return "http://eutils.ncbi.nlm.nih.gov/About/disclaimer.html";
}
WebQueryWidget *WebQueryPubMed::widget()
{
return m_widget;
}
void WebQueryPubMed::query()
{
WebQuery::query();
Settings *settings = Settings::self();
settings->setWebQueryDefault( "PubMed", m_widget->lineEditQuery->text() );
setNumStages( 2 );
int numberOfResults = m_widget->spinBoxMaxHits->value();
TQString searchTerm = m_widget->lineEditQuery->text().stripWhiteSpace().replace( '$', "" );
if ( searchTerm.isEmpty() )
{
setEndSearch( WebQuery::statusInvalidQuery );
return;
}
searchTerm = searchTerm.replace( "%", "%25" ).replace( "+", "%2B" ).replace( " ", "%20" ).replace( "#", "%23" ).replace( "&", "%26" ).replace( "?", "%3F" );
KURL url = KURL( TQString( "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=%2&retmax=%1&tool=KBibTeX&email=kbibtex@unix-ag.uni-kl.de" ).arg( numberOfResults ).arg( searchTerm ) );
TQString data = downloadHTML( url );
if ( data != TQString::null && !m_aborted )
{
TQBuffer buffer;
buffer.open( IO_WriteOnly );
TQTextStream ts( &buffer );
ts.setEncoding( TQTextStream::UnicodeUTF8 );
ts << data << endl;
buffer.close();
buffer.open( IO_ReadOnly );
TQValueList<int> intList;
TQXmlInputSource inputSource( &buffer );
TQXmlSimpleReader reader;
WebQueryPubMedStructureParserQuery handler( &intList );
reader.setContentHandler( &handler );
reader.parse( &inputSource );
buffer.close();
TQString ids;
TQValueList<int>::iterator it = intList.begin();
if ( it != intList.end() )
{
ids.append( TQString::number( *it ) );
++it;
for ( ; it != intList.end(); ++it )
{
ids.append( "," );
ids.append( TQString::number( *it ) );
}
}
url = KURL( TQString( "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&id=%1&tool=KBibTeX&email=kbibtex@unix-ag.uni-kl.de" ).arg( ids ) );
data = downloadHTML( url );
if ( data != TQString::null && !m_aborted )
{
buffer.open( IO_WriteOnly );
TQTextStream ts( &buffer );
ts.setEncoding( TQTextStream::UnicodeUTF8 );
ts << data << endl;
buffer.close();
buffer.open( IO_ReadOnly );
TQDomDocument doc( "efetch'ed" );
doc.setContent( &buffer );
TQDomElement docElem = doc.documentElement();
WebQueryPubMedResultParser resultParser;
connect( &resultParser, SIGNAL( foundEntry( BibTeX::Entry*, bool ) ), this, SIGNAL( foundEntry( BibTeX::Entry*, bool ) ) );
resultParser.parse( docElem );
buffer.close();
setEndSearch( WebQuery::statusSuccess );
}
else if ( !m_aborted )
{
TQString message = TDEIO::NetAccess::lastErrorString();
message.prepend( TQString( i18n( "Querying database '%1' failed." ) ).arg( title() ) );
KMessageBox::error( m_parent, message );
setEndSearch( WebQuery::statusError );
}
else
setEndSearch( WebQuery::statusAborted );
}
else if ( !m_aborted )
{
TQString message = TDEIO::NetAccess::lastErrorString();
if ( message.isEmpty() )
message.prepend( '\n' );
message.prepend( TQString( i18n( "Querying database '%1' failed." ) ).arg( title() ) );
KMessageBox::error( m_parent, message );
setEndSearch( WebQuery::statusError );
}
else
setEndSearch( WebQuery::statusAborted );
}
WebQueryPubMedStructureParserQuery::WebQueryPubMedStructureParserQuery( TQValueList<int> *intList ) : TQXmlDefaultHandler(), m_intList( intList )
{
m_intList->clear();
}
WebQueryPubMedStructureParserQuery::~WebQueryPubMedStructureParserQuery( )
{
// nothing
}
bool WebQueryPubMedStructureParserQuery::startElement( const TQString & /*namespaceURI*/, const TQString & /*localName*/, const TQString & /*qName*/, const TQXmlAttributes & /*atts*/ )
{
concatString = TQString();
return TRUE;
}
bool WebQueryPubMedStructureParserQuery::endElement( const TQString & /*namespaceURI*/, const TQString & /*localName*/, const TQString & qName )
{
if ( qName == "Id" )
{
bool ok;
int id = concatString.toInt( &ok );
if ( ok && id > 0 && m_intList != NULL )
m_intList->append( id );
}
return TRUE;
}
bool WebQueryPubMedStructureParserQuery::characters( const TQString & ch )
{
concatString.append( ch );
return TRUE;
}
WebQueryPubMedResultParser::WebQueryPubMedResultParser( ) : TQObject()
{
// nothing
}
void WebQueryPubMedResultParser::parse( const TQDomElement& rootElement )
{
if ( rootElement.tagName() == "PubmedArticleSet" )
for ( TQDomNode n = rootElement.firstChild(); !n.isNull(); n = n.nextSibling() )
{
TQDomElement e = n.toElement();
if ( !e.isNull() && e.tagName() == "PubmedArticle" )
{
BibTeX::Entry * entry = new BibTeX::Entry( BibTeX::Entry::etMisc, "PubMed" );
parsePubmedArticle( e, entry );
emit foundEntry( entry, false );
}
}
}
WebQueryPubMedResultParser::~WebQueryPubMedResultParser()
{
// nothing
}
void WebQueryPubMedResultParser::parsePubmedArticle( const TQDomElement& element, BibTeX::Entry *entry )
{
for ( TQDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() )
{
TQDomElement e = n.toElement();
if ( !e.isNull() && e.tagName() == "MedlineCitation" )
parseMedlineCitation( e, entry );
}
}
void WebQueryPubMedResultParser::parseMedlineCitation( const TQDomElement& element, BibTeX::Entry *entry )
{
for ( TQDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() )
{
TQDomElement e = n.toElement();
if ( !e.isNull() )
{
if ( e.tagName() == "PMID" )
{
entry->setId( TQString( "PubMed_%1" ).arg( e.text() ) );
/** add url to pubmed website */
BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftURL );
if ( field == NULL )
{
field = new BibTeX::EntryField( BibTeX::EntryField::ftURL );
entry->addField( field );
}
field->setValue( new BibTeX::Value( TQString( "http://www.ncbi.nlm.nih.gov/pubmed/" ).append( e.text() ) ) );
}
else if ( e.tagName() == "Article" )
parseArticle( e, entry );
else if ( e.tagName() == "MedlineJournalInfo" )
{
for ( TQDomNode n2 = e.firstChild(); !n2.isNull(); n2 = n2.nextSibling() )
{
TQDomElement e2 = n2.toElement();
if ( e2.tagName() == "MedlineTA" )
{
BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftJournal );
if ( field == NULL )
{
field = new BibTeX::EntryField( BibTeX::EntryField::ftJournal );
entry->addField( field );
}
field->setValue( new BibTeX::Value( e2.text() ) );
}
}
}
}
}
}
void WebQueryPubMedResultParser::parseArticle( const TQDomElement& element, BibTeX::Entry *entry )
{
for ( TQDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() )
{
TQDomElement e = n.toElement();
if ( e.tagName() == "Journal" )
{
parseJournal( e, entry );
entry->setEntryType( BibTeX::Entry::etArticle );
}
else if ( e.tagName() == "ArticleTitle" )
{
BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftTitle );
if ( field == NULL )
{
field = new BibTeX::EntryField( BibTeX::EntryField::ftTitle );
entry->addField( field );
}
field->setValue( new BibTeX::Value( e.text() ) );
}
else if ( e.tagName() == "Pagination" )
{
TQDomElement medlinePgn = e.firstChild().toElement(); // may fail?
if ( !medlinePgn.text().isEmpty() )
{
BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftPages );
if ( field == NULL )
{
field = new BibTeX::EntryField( BibTeX::EntryField::ftPages );
entry->addField( field );
}
field->setValue( new BibTeX::Value( medlinePgn.text() ) );
}
}
else if ( e.tagName() == "Abstract" )
{
TQDomElement abstractText = e.firstChild().toElement();
BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftAbstract );
if ( field == NULL )
{
field = new BibTeX::EntryField( BibTeX::EntryField::ftAbstract );
entry->addField( field );
}
field->setValue( new BibTeX::Value( abstractText.text() ) );
}
else if ( e.tagName() == "Affiliation" )
{
BibTeX::EntryField * field = entry->getField( "affiliation" );
if ( field == NULL )
{
field = new BibTeX::EntryField( "affiliation" );
entry->addField( field );
}
field->setValue( new BibTeX::Value( e.text() ) );
}
else if ( e.tagName() == "AuthorList" )
parseAuthorList( e, entry );
}
}
void WebQueryPubMedResultParser::parseJournal( const TQDomElement& element, BibTeX::Entry *entry )
{
for ( TQDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() )
{
TQDomElement e = n.toElement();
if ( e.tagName() == "ISSN" )
{
BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftISSN );
if ( field == NULL )
{
field = new BibTeX::EntryField( BibTeX::EntryField::ftISSN );
entry->addField( field );
}
field->setValue( new BibTeX::Value( e.text() ) );
}
else if ( e.tagName() == "JournalIssue" )
parseJournalIssue( e, entry );
else if ( e.tagName() == "Title" )
{
BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftJournal );
if ( field == NULL )
{
field = new BibTeX::EntryField( BibTeX::EntryField::ftJournal );
entry->addField( field );
}
field->setValue( new BibTeX::Value( e.text() ) );
}
}
}
void WebQueryPubMedResultParser::parseJournalIssue( const TQDomElement& element, BibTeX::Entry *entry )
{
for ( TQDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() )
{
TQDomElement e = n.toElement();
if ( e.tagName() == "Volume" )
{
BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftVolume );
if ( field == NULL )
{
field = new BibTeX::EntryField( BibTeX::EntryField::ftVolume );
entry->addField( field );
}
field->setValue( new BibTeX::Value( e.text() ) );
}
else if ( e.tagName() == "Issue" )
{
BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftNumber );
if ( field == NULL )
{
field = new BibTeX::EntryField( BibTeX::EntryField::ftNumber );
entry->addField( field );
}
field->setValue( new BibTeX::Value( e.text() ) );
}
else if ( e.tagName() == "PubDate" )
parsePubDate( e, entry );
}
}
void WebQueryPubMedResultParser::parsePubDate( const TQDomElement& element, BibTeX::Entry *entry )
{
for ( TQDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() )
{
TQDomElement e = n.toElement();
if ( e.tagName() == "Year" )
{
BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftYear );
if ( field == NULL )
{
field = new BibTeX::EntryField( BibTeX::EntryField::ftYear );
entry->addField( field );
}
field->setValue( new BibTeX::Value( e.text() ) );
}
else if ( e.tagName() == "Month" )
{
TQString month = e.text().lower();
BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftMonth );
if ( field == NULL )
{
field = new BibTeX::EntryField( BibTeX::EntryField::ftMonth );
entry->addField( field );
}
BibTeX::Value *value = new BibTeX::Value();
value->items.append( new BibTeX::MacroKey( month ) );
field->setValue( value );
}
else if ( e.tagName() == "MedlineDate" )
{
TQStringList frags = TQStringList::split( TQRegExp( "\\s+" ), e.text() );
for ( TQStringList::Iterator it = frags.begin(); it != frags.end(); ++it )
{
bool ok;
int num = ( *it ).toInt( &ok );
if ( ok && num > 1000 && num < 3000 )
{
BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftYear );
if ( field == NULL )
{
field = new BibTeX::EntryField( BibTeX::EntryField::ftYear );
entry->addField( field );
}
BibTeX::Value *value = new BibTeX::Value();
value->items.append( new BibTeX::MacroKey( TQString::number( num ) ) );
field->setValue( value );
}
else if ( !ok && ( *it ).length() == 3 )
{
BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftMonth );
if ( field == NULL )
{
field = new BibTeX::EntryField( BibTeX::EntryField::ftMonth );
entry->addField( field );
}
BibTeX::Value *value = new BibTeX::Value();
value->items.append( new BibTeX::MacroKey(( *it ).lower() ) );
field->setValue( value );
}
}
}
}
}
void WebQueryPubMedResultParser::parseAuthorList( const TQDomElement& element, BibTeX::Entry *entry )
{
if ( element.attribute( "CompleteYN", "Y" ) == "Y" )
{
TQStringList authorList;
for ( TQDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() )
{
TQDomElement e = n.toElement();
if ( e.tagName() == "Author" && e.attribute( "ValidYN", "Y" ) == "Y" )
{
TQString lastName = TQString::null, firstName = TQString::null;
for ( TQDomNode n2 = e.firstChild(); !n2.isNull(); n2 = n2.nextSibling() )
{
TQDomElement e2 = n2.toElement();
if ( e2.tagName() == "LastName" )
lastName = e2.text();
else if ( e2.tagName() == "CollectiveName" )
lastName = e2.text();
else if ( e2.tagName() == "FirstName" || e2.tagName() == "ForeName" )
firstName = e2.text();
}
TQString name = lastName;
if ( !firstName.isNull() && !firstName.isEmpty() )
{
if ( name.isNull() ) name = "UNSET";
name.prepend( "|" ).prepend( firstName );
}
if ( !name.isNull() )
authorList.append( name );
}
}
BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftAuthor );
if ( field == NULL )
{
field = new BibTeX::EntryField( BibTeX::EntryField::ftAuthor );
entry->addField( field );
}
BibTeX::Value *value = new BibTeX::Value();
Settings *settings = Settings::self();
BibTeX::PersonContainer *personContainer = new BibTeX::PersonContainer( settings->editing_FirstNameFirst );
value->items.append( personContainer );
for ( TQStringList::Iterator sli = authorList.begin(); sli != authorList.end(); ++sli )
{
TQStringList nameParts = TQStringList::split( '|', *sli );
TQString firstName = nameParts.count() > 1 ? nameParts[0] : "";
TQString lastName = nameParts[nameParts.count() - 1];
personContainer->persons.append( new BibTeX::Person( firstName, lastName, settings->editing_FirstNameFirst ) );
}
field->setValue( value );
}
}
}
#include "webquerypubmed.moc"