|
|
|
/*******************************************************************************
|
|
|
|
**
|
|
|
|
** Filename : levenshteintable.cpp
|
|
|
|
** Created on : 08 november, 2003
|
|
|
|
** Copyright : (c) 2003 Otto Bruggeman
|
|
|
|
** Email : bruggie@home.nl
|
|
|
|
**
|
|
|
|
*******************************************************************************/
|
|
|
|
|
|
|
|
/*******************************************************************************
|
|
|
|
**
|
|
|
|
** This program is free software; you can redistribute it and/or modify
|
|
|
|
** it under the terms of the GNU General Public License as published by
|
|
|
|
** the Free Software Foundation; either version 2 of the License, or
|
|
|
|
** (at your option) any later version.
|
|
|
|
**
|
|
|
|
*******************************************************************************/
|
|
|
|
|
|
|
|
#include <iostream>
|
|
|
|
|
|
|
|
#include <tqstring.h>
|
|
|
|
|
|
|
|
#include <kdebug.h>
|
|
|
|
#include <tdeglobal.h>
|
|
|
|
|
|
|
|
#include "levenshteintable.h"
|
|
|
|
|
|
|
|
#include "difference.h"
|
|
|
|
|
|
|
|
using namespace Diff2;
|
|
|
|
|
|
|
|
LevenshteinTable::LevenshteinTable()
|
|
|
|
: m_width( 256 ),
|
|
|
|
m_height( 256 ),
|
|
|
|
m_size( m_height * m_width ),
|
|
|
|
m_table( new unsigned int[ m_size ] ),
|
|
|
|
m_source( 0 ),
|
|
|
|
m_destination( 0 )
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
LevenshteinTable::LevenshteinTable( unsigned int width, unsigned int height )
|
|
|
|
: m_width( width ),
|
|
|
|
m_height( height ),
|
|
|
|
m_size( m_width * m_height ),
|
|
|
|
m_table( new unsigned int[ m_size ] ),
|
|
|
|
m_source( 0 ),
|
|
|
|
m_destination( 0 )
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
LevenshteinTable::~LevenshteinTable()
|
|
|
|
{
|
|
|
|
delete[] m_table;
|
|
|
|
m_source = 0;
|
|
|
|
m_destination = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int LevenshteinTable::getContent( unsigned int posX, unsigned int posY ) const
|
|
|
|
{
|
|
|
|
// kdDebug(8101) << "Width = " << m_width << ", height = " << m_height << ", posX = " << posX << ", posY = " << posY << endl;
|
|
|
|
return m_table[ posY * m_width + posX ];
|
|
|
|
}
|
|
|
|
|
|
|
|
int LevenshteinTable::setContent( unsigned int posX, unsigned int posY, int value )
|
|
|
|
{
|
|
|
|
m_table[ posY * m_width + posX ] = value;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool LevenshteinTable::setSize( unsigned int width, unsigned int height )
|
|
|
|
{
|
|
|
|
// Set a limit of 16.7 million entries, will be about 64 MB of ram, that should be plenty
|
|
|
|
if ( ( ( width ) * ( height ) ) > ( 256 * 256 * 256 ) )
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if ( ( ( width ) * ( height ) ) > m_size )
|
|
|
|
{
|
|
|
|
delete[] m_table;
|
|
|
|
|
|
|
|
m_size = width * height;
|
|
|
|
m_table = new unsigned int[ m_size ];
|
|
|
|
}
|
|
|
|
|
|
|
|
m_width = width;
|
|
|
|
m_height = height;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void LevenshteinTable::dumpLevenshteinTable()
|
|
|
|
{
|
|
|
|
for ( unsigned int i = 0; i < m_height; ++i )
|
|
|
|
{
|
|
|
|
for ( unsigned int j = 0; j < m_width; ++j )
|
|
|
|
{
|
|
|
|
std::cout.width( 3 );
|
|
|
|
std::cout << getContent( j, i );
|
|
|
|
}
|
|
|
|
std::cout << std::endl;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned int LevenshteinTable::createTable( DifferenceString* source, DifferenceString* destination )
|
|
|
|
{
|
|
|
|
m_source = source;
|
|
|
|
m_destination = destination;
|
|
|
|
|
|
|
|
TQString s = ' ' + source->string(); // Optimization, so i dont have to subtract 1 from the indexes every
|
|
|
|
TQString d = ' ' + destination->string(); // single time and add 1 to the width and height of the table
|
|
|
|
|
|
|
|
unsigned int m = s.length();
|
|
|
|
unsigned int n = d.length();
|
|
|
|
|
|
|
|
const TQChar* sq = s.unicode();
|
|
|
|
const TQChar* dq = d.unicode();
|
|
|
|
|
|
|
|
if ( m == 1 )
|
|
|
|
return --n;
|
|
|
|
|
|
|
|
if ( n == 1 )
|
|
|
|
return --m;
|
|
|
|
|
|
|
|
if ( !setSize( m, n ) )
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
unsigned int i;
|
|
|
|
unsigned int j;
|
|
|
|
|
|
|
|
// initialize first row
|
|
|
|
for ( i = 0; i < m; ++i )
|
|
|
|
setContent( i, 0, i );
|
|
|
|
// initialize first column
|
|
|
|
for ( j = 0; j < n; ++j )
|
|
|
|
setContent( 0, j, j );
|
|
|
|
|
|
|
|
int cost = 0, north = 0, west = 0, northwest = 0;
|
|
|
|
|
|
|
|
ushort si, dj;
|
|
|
|
// Optimization, calculate row wise instead of column wise, wont trash the cache so much with large strings
|
|
|
|
for ( j = 1; j < n; ++j )
|
|
|
|
{
|
|
|
|
dj = dq[ j ];
|
|
|
|
|
|
|
|
for ( i = 1; i < m; ++i )
|
|
|
|
{
|
|
|
|
si = sq[ i ];
|
|
|
|
if ( si == dj )
|
|
|
|
cost = 0;
|
|
|
|
else
|
|
|
|
cost = 1;
|
|
|
|
|
|
|
|
north = getContent( i, j-1 ) + 1;
|
|
|
|
west = getContent( i-1, j ) + 1;
|
|
|
|
northwest = getContent( i-1, j-1 ) + cost;
|
|
|
|
|
|
|
|
setContent( i, j, kMin( north, kMin( west, northwest ) ) );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return getContent( m-1, n-1 );
|
|
|
|
}
|
|
|
|
|
|
|
|
int LevenshteinTable::chooseRoute( int c1, int c2, int c3 )
|
|
|
|
{
|
|
|
|
// kdDebug(8101) << "c1 = " << c1 << ", c2 = " << c2 << ", c3 = " << c3 << endl;
|
|
|
|
// preference order: c2, c3, c1, hopefully this will work out for me
|
|
|
|
if ( c2 <= c1 && c2 <= c3 )
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
if ( c3 <= c2 && c3 <= c1 )
|
|
|
|
return 2;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void LevenshteinTable::createListsOfMarkers()
|
|
|
|
{
|
|
|
|
// std::cout << source.latin1() << std::endl;
|
|
|
|
// std::cout << destination.latin1() << std::endl;
|
|
|
|
// dumpLevenshteinTable();
|
|
|
|
|
|
|
|
unsigned int x = m_width-1;
|
|
|
|
unsigned int y = m_height-1;
|
|
|
|
|
|
|
|
Marker* c = 0;
|
|
|
|
|
|
|
|
int n, nw, w, direction, currentValue;
|
|
|
|
while ( x > 0 && y > 0 )
|
|
|
|
{
|
|
|
|
currentValue = getContent( x, y );
|
|
|
|
|
|
|
|
nw = getContent( x - 1, y - 1 );
|
|
|
|
n = getContent( x, y - 1 );
|
|
|
|
w = getContent( x - 1, y );
|
|
|
|
|
|
|
|
direction = chooseRoute( n, nw, w );
|
|
|
|
|
|
|
|
switch ( direction )
|
|
|
|
{
|
|
|
|
case 0: // north
|
|
|
|
// kdDebug(8101) << "Picking north" << endl;
|
|
|
|
// kdDebug(8101) << "Source[" << ( x - 1 ) << "] = " << TQString( source[ x-1 ] ) << ", destination[" << ( y - 1 ) << "] = " << TQString( destination[ y-1 ] ) << endl;
|
|
|
|
|
|
|
|
if ( !m_destination->markerList().isEmpty() )
|
|
|
|
c = m_destination->markerList().first();
|
|
|
|
else
|
|
|
|
c = 0;
|
|
|
|
|
|
|
|
if ( c && c->type() == Marker::End )
|
|
|
|
{
|
|
|
|
// kdDebug(8101) << "CurrentValue: " << currentValue << endl;
|
|
|
|
if ( n == currentValue )
|
|
|
|
m_destination->prepend( new Marker( Marker::Start, y ) );
|
|
|
|
// else: the change continues, dont do anything
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// kdDebug(8101) << "CurrentValue: " << currentValue << endl;
|
|
|
|
if ( n < currentValue )
|
|
|
|
m_destination->prepend( new Marker( Marker::End, y ) );
|
|
|
|
}
|
|
|
|
|
|
|
|
--y;
|
|
|
|
break;
|
|
|
|
case 1: // northwest
|
|
|
|
// kdDebug(8101) << "Picking northwest" << endl;
|
|
|
|
// kdDebug(8101) << "Source[" << ( x - 1 ) << "] = " << TQString( source[ x-1 ] ) << ", destination[" << ( y - 1 ) << "] = " << TQString( destination[ y-1 ] ) << endl;
|
|
|
|
|
|
|
|
if ( !m_destination->markerList().isEmpty() )
|
|
|
|
c = m_destination->markerList().first();
|
|
|
|
else
|
|
|
|
c = 0;
|
|
|
|
|
|
|
|
if ( c && c->type() == Marker::End )
|
|
|
|
{
|
|
|
|
// kdDebug(8101) << "End found: CurrentValue: " << currentValue << endl;
|
|
|
|
if ( nw == currentValue )
|
|
|
|
m_destination->prepend( new Marker( Marker::Start, y ) );
|
|
|
|
// else: the change continues, dont do anything
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// kdDebug(8101) << "CurrentValue: " << currentValue << endl;
|
|
|
|
if ( nw < currentValue )
|
|
|
|
m_destination->prepend( new Marker( Marker::End, y ) );
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( !m_source->markerList().isEmpty() )
|
|
|
|
c = m_source->markerList().first();
|
|
|
|
else
|
|
|
|
c = 0;
|
|
|
|
|
|
|
|
if ( c && c->type() == Marker::End )
|
|
|
|
{
|
|
|
|
// kdDebug(8101) << "End found: CurrentValue: " << currentValue << endl;
|
|
|
|
if ( nw == currentValue )
|
|
|
|
m_source->prepend( new Marker( Marker::Start, x ) );
|
|
|
|
// else: the change continues, dont do anything
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// kdDebug(8101) << "CurrentValue: " << currentValue << endl;
|
|
|
|
if ( nw < currentValue )
|
|
|
|
m_source->prepend( new Marker( Marker::End, x ) );
|
|
|
|
}
|
|
|
|
|
|
|
|
--y;
|
|
|
|
--x;
|
|
|
|
break;
|
|
|
|
case 2: // west
|
|
|
|
// kdDebug(8101) << "Picking west" << endl;
|
|
|
|
// kdDebug(8101) << "Source[" << ( x - 1 ) << "] = " << TQString( source[ x-1 ] ) << ", destination[" << ( y - 1 ) << "] = " << TQString( destination[ y-1 ] ) << endl;
|
|
|
|
|
|
|
|
if ( !m_source->markerList().isEmpty() )
|
|
|
|
c = m_source->markerList().first();
|
|
|
|
else
|
|
|
|
c = 0;
|
|
|
|
|
|
|
|
if ( c && c->type() == Marker::End )
|
|
|
|
{
|
|
|
|
// kdDebug(8101) << "End found: CurrentValue: " << currentValue << endl;
|
|
|
|
if ( w == currentValue )
|
|
|
|
m_source->prepend( new Marker( Marker::Start, x ) );
|
|
|
|
// else: the change continues, dont do anything
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// kdDebug(8101) << "CurrentValue: " << currentValue << endl;
|
|
|
|
if ( w < currentValue )
|
|
|
|
m_source->prepend( new Marker( Marker::End, x ) );
|
|
|
|
}
|
|
|
|
|
|
|
|
--x;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// kdDebug(8101) << "Source string: " << m_source->string() << endl;
|
|
|
|
// c = m_source->markerList()->first();
|
|
|
|
// TQStringList list;
|
|
|
|
// unsigned int prevValue = 0;
|
|
|
|
// for ( ; c; c = m_source->markerList()->next() )
|
|
|
|
// {
|
|
|
|
// kdDebug(8101) << "Source Marker Entry : Type: " << c->type() << ", Offset: " << c->offset() << endl;
|
|
|
|
// list.append( m_source->string().mid( prevValue, c->offset() - prevValue ) );
|
|
|
|
// prevValue = c->offset();
|
|
|
|
// }
|
|
|
|
// if ( prevValue < m_source->string().length() - 1 )
|
|
|
|
// {
|
|
|
|
// list.append( m_source->string().mid( prevValue, m_source->string().length() - prevValue ) );
|
|
|
|
// }
|
|
|
|
// kdDebug(8101) << "Source Resulting stringlist : " << list.join("\n") << endl;
|
|
|
|
|
|
|
|
// list.clear();
|
|
|
|
// prevValue = 0;
|
|
|
|
|
|
|
|
// kdDebug(8101) << "Destination string: " << m_destination->string() << endl;
|
|
|
|
// for ( ; c; c = m_destination->markerList()->next() )
|
|
|
|
// {
|
|
|
|
// kdDebug(8101) << "Destination Marker Entry : Type: " << c->type() << ", Offset: " << c->offset() << endl;
|
|
|
|
// list.append( m_destination->string().mid( prevValue, c->offset() - prevValue ) );
|
|
|
|
// prevValue = c->offset();
|
|
|
|
// }
|
|
|
|
// if ( prevValue < m_destination->string().length() - 1 )
|
|
|
|
// {
|
|
|
|
// list.append( m_destination->string().mid( prevValue, m_destination->string().length() - prevValue ) );
|
|
|
|
// }
|
|
|
|
// kdDebug(8101) << "Destination Resulting string : " << list.join("\n") << endl;
|
|
|
|
}
|
|
|
|
|