You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tdesdk/kompare/libdiff2/levenshteintable.cpp

333 lines
8.8 KiB

/*******************************************************************************
**
** Filename : levenshteintable.cpp
** Created on : 08 november, 2003
** Copyright : (c) 2003 Otto Bruggeman
** Email : bruggie@home.nl
**
*******************************************************************************/
/*******************************************************************************
**
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**
*******************************************************************************/
#include <iostream>
#include <qstring.h>
#include <kdebug.h>
#include <kglobal.h>
#include "levenshteintable.h"
#include "difference.h"
using namespace Diff2;
LevenshteinTable::LevenshteinTable()
: m_width( 256 ),
m_height( 256 ),
m_size( m_height * m_width ),
m_table( new unsigned int[ m_size ] ),
m_source( 0 ),
m_destination( 0 )
{
}
LevenshteinTable::LevenshteinTable( unsigned int width, unsigned int height )
: m_width( width ),
m_height( height ),
m_size( m_width * m_height ),
m_table( new unsigned int[ m_size ] ),
m_source( 0 ),
m_destination( 0 )
{
}
LevenshteinTable::~LevenshteinTable()
{
delete[] m_table;
m_source = 0;
m_destination = 0;
}
int LevenshteinTable::getContent( unsigned int posX, unsigned int posY ) const
{
// kdDebug(8101) << "Width = " << m_width << ", height = " << m_height << ", posX = " << posX << ", posY = " << posY << endl;
return m_table[ posY * m_width + posX ];
}
int LevenshteinTable::setContent( unsigned int posX, unsigned int posY, int value )
{
m_table[ posY * m_width + posX ] = value;
return 0;
}
bool LevenshteinTable::setSize( unsigned int width, unsigned int height )
{
// Set a limit of 16.7 million entries, will be about 64 MB of ram, that should be plenty
if ( ( ( width ) * ( height ) ) > ( 256 * 256 * 256 ) )
return false;
if ( ( ( width ) * ( height ) ) > m_size )
{
delete[] m_table;
m_size = width * height;
m_table = new unsigned int[ m_size ];
}
m_width = width;
m_height = height;
return true;
}
void LevenshteinTable::dumpLevenshteinTable()
{
for ( unsigned int i = 0; i < m_height; ++i )
{
for ( unsigned int j = 0; j < m_width; ++j )
{
std::cout.width( 3 );
std::cout << getContent( j, i );
}
std::cout << std::endl;
}
}
unsigned int LevenshteinTable::createTable( DifferenceString* source, DifferenceString* destination )
{
m_source = source;
m_destination = destination;
QString s = ' ' + source->string(); // Optimization, so i dont have to subtract 1 from the indexes every
QString d = ' ' + destination->string(); // single time and add 1 to the width and height of the table
unsigned int m = s.length();
unsigned int n = d.length();
const QChar* sq = s.unicode();
const QChar* dq = d.unicode();
if ( m == 1 )
return --n;
if ( n == 1 )
return --m;
if ( !setSize( m, n ) )
return 0;
unsigned int i;
unsigned int j;
// initialize first row
for ( i = 0; i < m; ++i )
setContent( i, 0, i );
// initialize first column
for ( j = 0; j < n; ++j )
setContent( 0, j, j );
int cost = 0, north = 0, west = 0, northwest = 0;
ushort si, dj;
// Optimization, calculate row wise instead of column wise, wont trash the cache so much with large strings
for ( j = 1; j < n; ++j )
{
dj = dq[ j ];
for ( i = 1; i < m; ++i )
{
si = sq[ i ];
if ( si == dj )
cost = 0;
else
cost = 1;
north = getContent( i, j-1 ) + 1;
west = getContent( i-1, j ) + 1;
northwest = getContent( i-1, j-1 ) + cost;
setContent( i, j, kMin( north, kMin( west, northwest ) ) );
}
}
return getContent( m-1, n-1 );
}
int LevenshteinTable::chooseRoute( int c1, int c2, int c3 )
{
// kdDebug(8101) << "c1 = " << c1 << ", c2 = " << c2 << ", c3 = " << c3 << endl;
// preference order: c2, c3, c1, hopefully this will work out for me
if ( c2 <= c1 && c2 <= c3 )
return 1;
if ( c3 <= c2 && c3 <= c1 )
return 2;
return 0;
}
void LevenshteinTable::createListsOfMarkers()
{
// std::cout << source.latin1() << std::endl;
// std::cout << destination.latin1() << std::endl;
// dumpLevenshteinTable();
unsigned int x = m_width-1;
unsigned int y = m_height-1;
Marker* c = 0;
int n, nw, w, direction, currentValue;
while ( x > 0 && y > 0 )
{
currentValue = getContent( x, y );
nw = getContent( x - 1, y - 1 );
n = getContent( x, y - 1 );
w = getContent( x - 1, y );
direction = chooseRoute( n, nw, w );
switch ( direction )
{
case 0: // north
// kdDebug(8101) << "Picking north" << endl;
// kdDebug(8101) << "Source[" << ( x - 1 ) << "] = " << QString( source[ x-1 ] ) << ", destination[" << ( y - 1 ) << "] = " << QString( destination[ y-1 ] ) << endl;
if ( !m_destination->markerList().isEmpty() )
c = m_destination->markerList().first();
else
c = 0;
if ( c && c->type() == Marker::End )
{
// kdDebug(8101) << "CurrentValue: " << currentValue << endl;
if ( n == currentValue )
m_destination->prepend( new Marker( Marker::Start, y ) );
// else: the change continues, dont do anything
}
else
{
// kdDebug(8101) << "CurrentValue: " << currentValue << endl;
if ( n < currentValue )
m_destination->prepend( new Marker( Marker::End, y ) );
}
--y;
break;
case 1: // northwest
// kdDebug(8101) << "Picking northwest" << endl;
// kdDebug(8101) << "Source[" << ( x - 1 ) << "] = " << QString( source[ x-1 ] ) << ", destination[" << ( y - 1 ) << "] = " << QString( destination[ y-1 ] ) << endl;
if ( !m_destination->markerList().isEmpty() )
c = m_destination->markerList().first();
else
c = 0;
if ( c && c->type() == Marker::End )
{
// kdDebug(8101) << "End found: CurrentValue: " << currentValue << endl;
if ( nw == currentValue )
m_destination->prepend( new Marker( Marker::Start, y ) );
// else: the change continues, dont do anything
}
else
{
// kdDebug(8101) << "CurrentValue: " << currentValue << endl;
if ( nw < currentValue )
m_destination->prepend( new Marker( Marker::End, y ) );
}
if ( !m_source->markerList().isEmpty() )
c = m_source->markerList().first();
else
c = 0;
if ( c && c->type() == Marker::End )
{
// kdDebug(8101) << "End found: CurrentValue: " << currentValue << endl;
if ( nw == currentValue )
m_source->prepend( new Marker( Marker::Start, x ) );
// else: the change continues, dont do anything
}
else
{
// kdDebug(8101) << "CurrentValue: " << currentValue << endl;
if ( nw < currentValue )
m_source->prepend( new Marker( Marker::End, x ) );
}
--y;
--x;
break;
case 2: // west
// kdDebug(8101) << "Picking west" << endl;
// kdDebug(8101) << "Source[" << ( x - 1 ) << "] = " << QString( source[ x-1 ] ) << ", destination[" << ( y - 1 ) << "] = " << QString( destination[ y-1 ] ) << endl;
if ( !m_source->markerList().isEmpty() )
c = m_source->markerList().first();
else
c = 0;
if ( c && c->type() == Marker::End )
{
// kdDebug(8101) << "End found: CurrentValue: " << currentValue << endl;
if ( w == currentValue )
m_source->prepend( new Marker( Marker::Start, x ) );
// else: the change continues, dont do anything
}
else
{
// kdDebug(8101) << "CurrentValue: " << currentValue << endl;
if ( w < currentValue )
m_source->prepend( new Marker( Marker::End, x ) );
}
--x;
break;
}
}
// kdDebug(8101) << "Source string: " << m_source->string() << endl;
// c = m_source->markerList()->first();
// QStringList list;
// unsigned int prevValue = 0;
// for ( ; c; c = m_source->markerList()->next() )
// {
// kdDebug(8101) << "Source Marker Entry : Type: " << c->type() << ", Offset: " << c->offset() << endl;
// list.append( m_source->string().mid( prevValue, c->offset() - prevValue ) );
// prevValue = c->offset();
// }
// if ( prevValue < m_source->string().length() - 1 )
// {
// list.append( m_source->string().mid( prevValue, m_source->string().length() - prevValue ) );
// }
// kdDebug(8101) << "Source Resulting stringlist : " << list.join("\n") << endl;
// list.clear();
// prevValue = 0;
// kdDebug(8101) << "Destination string: " << m_destination->string() << endl;
// for ( ; c; c = m_destination->markerList()->next() )
// {
// kdDebug(8101) << "Destination Marker Entry : Type: " << c->type() << ", Offset: " << c->offset() << endl;
// list.append( m_destination->string().mid( prevValue, c->offset() - prevValue ) );
// prevValue = c->offset();
// }
// if ( prevValue < m_destination->string().length() - 1 )
// {
// list.append( m_destination->string().mid( prevValue, m_destination->string().length() - prevValue ) );
// }
// kdDebug(8101) << "Destination Resulting string : " << list.join("\n") << endl;
}