You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kbibtex/src/webqueryciteseerx.h

142 lines
5.0 KiB

/***************************************************************************
* Copyright (C) 2008 by Jacob Kanev <j_kanev@arcor.de>, *
* Thomas Fischer <fischer@unix-ag.uni-kl.de> *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
#ifndef KBIBTEXWEBQUERYCITESEERX_H
#define KBIBTEXWEBQUERYCITESEERX_H
#include <tqstring.h>
#include <tqstringlist.h>
#include <tdeio/job.h>
#include "fileimporterbibtex.h"
#include "webquery.h"
#include <deque>
class TQBuffer;
namespace KBibTeX
{
/**
@author Thomas Fischer <fischer@unix-ag.uni-kl.de>
*/
class WebQueryCiteSeerXWidget : public WebQueryWidget
{
TQ_OBJECT
public:
WebQueryCiteSeerXWidget( TQWidget *parent, const char *name = 0 );
};
/// Convinience typedef for member pointer to parsing function
class WebQueryCiteSeerX;
typedef void ( WebQueryCiteSeerX::* DataParser )( const TQString & );
/// Query the citeseer database.
/** This class is used for querying the citeseer data base. CiteSeerX is still beta, so this class has to be adapted as soon as the CiteSeer people change their web interface. After entering the search term, citeseer returns a page with 10 links (one for each paper), and one link for the next 10 hits. This class uses a queue to schedule each reading job, and two parsing functions, one for the summary page and one for each paper result. BibTeX fields abstract, title, author, year, journal, and pages are found.
@author Jacob Kanev <j_kanev@arcor.de> */
class WebQueryCiteSeerX : public WebQuery
{
TQ_OBJECT
public:
struct DataRequest
{
KURL url;
DataParser parser;
};
/// Construct.
WebQueryCiteSeerX( TQWidget* parent );
/// Destroy.
virtual ~WebQueryCiteSeerX();
/// Main function: start query.
void query();
/// Return title.
TQString title();
/// Return disclaimer.
TQString disclaimer();
/// Return disclaimer URL.
TQString disclaimerURL();
/// Return GUI element.
WebQueryWidget *widget();
protected:
/// Callback for cancelling.
void cancelQuery();
private slots:
/// Callback when the job is finished.
/**Reads the data from the job, and hands it over to the currently set parser. */
void getData( TDEIO::Job *job );
private:
/// Parses the main page and schedules single-paper reading jobs.
/** Function parses the summary page, and schedules one job for each paper link, and one job for the "Next 10" summary page. */
void parseSummaryPage( const TQString &data );
/// Parses single-paper pages.
/** Function reads the "Abstract:" and the "@entrytype{" strings found in the html page. */
void parsePaperPage( const TQString &data );
/// Execute next waiting job.
/** Takes the next query out of the queue, sets the appropriate parser, and schedules getData with the URL. */
void nextJob();
/// Find single bibtex field in html page and add to entry.
/** Function uses the first collected text from the description (a regular expression), and adds it as type "type" to the "entry". */
void parseForSingleExpression( TQString description, const TQString &data, BibTeX::Entry *entry, BibTeX::EntryField::FieldType type );
/// The currently active parser.
DataParser m_currentParser;
/// The internet address of CiteSeerX.
TQString m_citeSeerXServer;
/// Number of hits desired by user.
int m_desiredHits;
/// Number of hits read from summary pages.
int m_receivedHits;
/// List with waiting queries
std::deque<DataRequest> m_queryQueue;
/// Pointer to TQt dialog.
WebQueryCiteSeerXWidget *m_widget;
};
}
#endif