|
|
|
/***************************************************************************
|
|
|
|
* Copyright (C) 2004-2009 by Thomas Fischer *
|
|
|
|
* fischer@unix-ag.uni-kl.de *
|
|
|
|
* *
|
|
|
|
* This program is free software; you can redistribute it and/or modify *
|
|
|
|
* it under the terms of the GNU General Public License as published by *
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or *
|
|
|
|
* (at your option) any later version. *
|
|
|
|
* *
|
|
|
|
* This program is distributed in the hope that it will be useful, *
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
|
|
|
* GNU General Public License for more details. *
|
|
|
|
* *
|
|
|
|
* You should have received a copy of the GNU General Public License *
|
|
|
|
* along with this program; if not, write to the *
|
|
|
|
* Free Software Foundation, Inc., *
|
|
|
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
|
|
|
|
***************************************************************************/
|
|
|
|
#ifndef KBIBTEXFINDDUPLICATES_H
|
|
|
|
#define KBIBTEXFINDDUPLICATES_H
|
|
|
|
|
|
|
|
#include <tqobject.h>
|
|
|
|
|
|
|
|
class KProgressDialog;
|
|
|
|
class TQWidget;
|
|
|
|
|
|
|
|
namespace KBibTeX
|
|
|
|
{
|
|
|
|
|
|
|
|
/**
|
|
|
|
@author Thomas Fischer <fischer@unix-ag.uni-kl.de>
|
|
|
|
*/
|
|
|
|
class FindDuplicates : public TQObject
|
|
|
|
{
|
|
|
|
Q_OBJECT
|
|
|
|
public:
|
|
|
|
typedef TQValueList<BibTeX::Element*> DuplicateClique;
|
|
|
|
typedef TQValueList<DuplicateClique> DuplicateCliqueList;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Find duplicates in a given BibTeX file. The sensitivity parameter controls the distance between two elements where both elements are considered to be duplicates. The parent object is used as a progress dialog's parent.
|
|
|
|
* @param file
|
|
|
|
* @param sensitivity
|
|
|
|
* @param parent
|
|
|
|
* @return
|
|
|
|
*/
|
|
|
|
FindDuplicates( DuplicateCliqueList &result, unsigned int sensitivity, BibTeX::File *file, TQWidget *parent );
|
|
|
|
|
|
|
|
~FindDuplicates();
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Maximum sensitivity
|
|
|
|
*/
|
|
|
|
static const unsigned int maxDistance;
|
|
|
|
|
|
|
|
protected:
|
|
|
|
void determineDistances( BibTeX::File *file, unsigned int *distVector, TQMap<BibTeX::Element*, int> &mapElementToIndex, KProgressDialog *progDlg );
|
|
|
|
void buildClique( DuplicateCliqueList &result, BibTeX::File *file, unsigned int *distVector, TQMap<BibTeX::Element*, int> &mapElementToIndex, unsigned int sensitivity );
|
|
|
|
unsigned int entryDistance( BibTeX::Entry *entryA, BibTeX::Entry *entryB );
|
|
|
|
unsigned int macroDistance( BibTeX::Macro *macroA, BibTeX::Macro *macroB );
|
|
|
|
unsigned int preambleDistance( BibTeX::Preamble *preambleA, BibTeX::Preamble *preambleB );
|
|
|
|
|
|
|
|
static TQString extractTitle( BibTeX::Entry *entry );
|
|
|
|
static TQStringList authorsLastName( BibTeX::Entry *entry );
|
|
|
|
static int extractYear( BibTeX::Entry *entry );
|
|
|
|
static TQString extractMacroKey( BibTeX::Macro *macro );
|
|
|
|
static TQString extractMacroValue( BibTeX::Macro *macro );
|
|
|
|
|
|
|
|
private:
|
|
|
|
bool m_doCancel;
|
|
|
|
|
|
|
|
double levenshteinDistance( const TQStringList &s, const TQStringList &t );
|
|
|
|
double levenshteinDistance( const TQString &s, const TQString &t );
|
|
|
|
double levenshteinDistanceWord( const TQString &s, const TQString &t );
|
|
|
|
int arrayOffset( int a, int b );
|
|
|
|
void sort( unsigned int *array, int len );
|
|
|
|
|
|
|
|
private slots:
|
|
|
|
void slotCancel();
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|