You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tdebase/khotkeys/shared/voicesignature.cpp

423 lines
9.4 KiB

/***************************************************************************
* Copyright (C) 2005 by Olivier Goffart *
* ogoffart@kde.org *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
***************************************************************************/
#include "voicesignature.h"
#include "sound.h"
#include <kconfig.h>
#include <math.h>
#ifdef PI
#undef PI
#endif
#define PI (2.0 * asin(1.0))
#include <kdebug.h>
#include <tqdatetime.h>
#undef Complex
namespace KHotKeys
{
inline static float ABS(float X)
{
return (X>0) ? X : -X ;
}
inline static int MAX(int X , int Y)
{
return (X>Y) ? X : Y ;
}
inline static int MIN(int X , int Y)
{
return (X<Y) ? X : Y ;
}
class Complex
{
public:
Complex () {}
Complex (double re): _re(re), _im(0.0) {}
Complex (double re, double im): _re(re), _im(im) {}
double Re () const { return _re; }
double Im () const { return _im; }
void operator += (const Complex& c)
{
_re += c._re;
_im += c._im;
}
void operator -= (const Complex& c)
{
_re -= c._re;
_im -= c._im;
}
void operator *= (const Complex& c)
{
double reT = c._re * _re - c._im * _im;
_im = c._re * _im + c._im * _re;
_re = reT;
}
Complex operator- ()
{
return Complex (-_re, -_im);
}
Complex operator- (const Complex& c) const
{
return Complex (_re - c._re, _im - c._im);
}
Complex operator+ (const Complex& c) const
{
return Complex (_re + c._re, _im + c._im);
}
Complex operator* (const Complex& c) const
{
return Complex (_re * c._re - _im * c._im , _im * c._re + _re * c._im);
}
double Mod () const { return sqrt (_re * _re + _im * _im); }
static Complex fromExp(double mod, double arg) { return Complex(mod*cos(arg) , mod*sin(arg)); }
private:
double _re;
double _im;
};
static inline double hamming(uint n, uint size)
{
return HAMMING ? 0.54-0.46*cos( 2*PI*n /(size-1) ) : 1;
}
static TQMemArray<double> fft(const Sound& sound, unsigned int start, unsigned int stop)
{
if(start>=stop || sound.size() == 0)
return TQMemArray<double>();
//We need a sample with a size of a power of two
uint size=stop-start;
unsigned short log2size=0;
while( (1<<log2size) < size )
log2size++;
int diff=(1<<log2size) - size;
if(diff > size/4 || 1<<log2size > sound.size() )
{
log2size--;
diff=(1<<log2size) - size;
}
size=1<<log2size;
int start2=start-diff/2;
int stop2=start2+ size;
if(start2<0)
{
stop2-=start2;
start2=0;
}
if(stop2>sound.size())
{
start2-= stop2 - sound.size();
stop2=sound.size();
if(start2<0)
{
stop2-=start2;
start2=0;
}
}
//Generate an array to work in
TQMemArray<Complex> samples(size);
//Fill it with samples in the "reversed carry" order
int rev_carry = 0;
for (uint f = 0; f < size - 1; f++)
{
samples[f]=sound.at(start2+rev_carry)* hamming(rev_carry, size);
// KDEBUG(rev_carry);
int mask = size>>1; // N / 2
// add 1 backwards
while (rev_carry >= mask)
{
rev_carry -= mask; // turn off this bit
mask >>= 1;
}
rev_carry += mask;
}
samples[size-1]=sound.at(start2+size-1)*hamming(size-1, size);
//FFT
for(uint level=0; level < log2size; level++)
{
for( int k=0; k< (size>>1) ; k++)
{
uint indice1 = (k << (level+1) ) % (size-1); // (k*2*2^l)%(N-1)
uint indice2 = indice1 + (1<<level); // (k*2*2^l)%(N-1) + 2^l
uint coefW = ( k << (level+1) ) / (size-1); // (k*2*2^l) div (N-1)
double Wexpn=-2 * PI * coefW / (2 << level); // -2 pi n / 2^(l+1)
Complex W=Complex::fromExp(1, Wexpn) ;
//OPERATION BUTTERFLY
Complex a=samples[indice1];
Complex b=samples[indice2];
samples[indice1]=a+W*b;
samples[indice2]=a-W*b;
// kdDebug() << k_funcinfo << "PAPILLON s_" << indice1 << " s_" << indice2 << " W_" << (2<<level) << "^" << coefW << endl;
}
}
TQMemArray<double> result(size);
for(uint f=0;f<size;f++)
{
result[f]=samples[f].Mod() / size;
}
return result;
}
TQMemArray<double> VoiceSignature::fft(const Sound& sound, unsigned int start, unsigned int stop)
{
return KHotKeys::fft(sound, start, stop);
/*TQMemArray<double> result(8000);
for(int f=0; f<8000;f++)
{
Complex c(0);
for(uint x=start; x<stop; x++)
{
Complex s(sound.tqat(x));
double angle=-2*PI*f*x/8000;
s*= Complex( cos(angle) , sin(angle) );
c+=s;
}
result[f]= c.Mod()/(stop-start) ;
}
return result;*/
}
bool VoiceSignature::window(const Sound& sound, unsigned int *_start, unsigned int *_stop)
{
bool isNoise=false;
unsigned int length=sound.size();
uint unit=WINDOW_UNIT;
if(length < unit )
return false;
//Fen<65>trage
unsigned int start=0 , stop=0;
double moy=0;
for(uint x=0;x<unit;x++)
{
moy+=ABS(sound.at(x));
}
if(moy>WINDOW_MINIMUM*unit)
isNoise=true;
for(uint x=unit; x<length; x++)
{
if(moy<WINDOW_MINIMUM*unit)
{
if(stop==0)
start=x-unit/2;
}
else
stop=x-unit/2;
moy+=ABS(sound.at(x));
moy-=ABS(sound.at(x-unit));
}
if(moy>WINDOW_MINIMUM*unit && isNoise)
return false;
stop=MIN(length,stop+WINDOW_MINIMUM_ECART);
start=MAX(0 ,start-WINDOW_MINIMUM_ECART);
if(_start)
*_start=start;
if(_stop)
*_stop=stop;
return start<stop;
}
//finally doesn't give better results
/*#define HZ_TO_MEL(F) (1127*log(1+(F)/700.0))
#define MEL_TO_HZ(M) ( ( exp((M)/1127.0) -1) *700 )*/
#define HZ_TO_MEL(F) (F)
#define MEL_TO_HZ(F) (F)
VoiceSignature::VoiceSignature(const Sound& sound)
{
static uint temp_wind=0, temp_fft=0, temp_moy=0;
TQTime t;
t.start();
unsigned int start , stop;
if(!window(sound,&start,&stop))
{
kdWarning( 1217 ) << k_funcinfo << "No voice found in the sound" << endl ;
return;
}
temp_wind+=t.restart();
uint length=stop-start;
for(int wind=0; wind<WINDOW_NUMBER; wind++)
{
unsigned int w_start=MAX(start, start+ (int)((wind - WINDOW_SUPER)*length/WINDOW_NUMBER));
unsigned int w_stop =MIN(stop , start+ (int)((wind+1.0+WINDOW_SUPER)*length/WINDOW_NUMBER));
TQMemArray<double> fourrier=fft(sound, w_start,w_stop);
temp_fft+=t.restart();
//MEL conversion
double mel_start=HZ_TO_MEL(FFT_RANGE_INF);
uint mel_stop=HZ_TO_MEL(FFT_RANGE_SUP);
for(int four=0; four<FOUR_NUMBER; four++)
{
unsigned int wf_start=mel_start + four*(mel_stop-mel_start)/FOUR_NUMBER;
unsigned int wf_stop=mel_start + (four+1)*(mel_stop-mel_start)/FOUR_NUMBER;
unsigned int f_start=MEL_TO_HZ( wf_start )*fourrier.size()/sound.fs();
unsigned int f_stop=MEL_TO_HZ( wf_stop )*fourrier.size()/sound.fs();
unsigned int f_size=f_stop-f_start;
double nb=0;
for(uint f=f_start; f<f_stop; f++)
{
int freq=f*fourrier.size()/sound.fs();
nb+=fourrier[f]*FFT_PONDERATION(freq);
}
nb/=(f_size);
data[wind][four]=nb;
}
temp_moy+=t.restart();
}
// kdDebug( 1217 ) << k_funcinfo << "wind: "<< temp_wind << " - fft: " << temp_fft << " - moy: " << temp_moy << endl;
}
VoiceSignature::~VoiceSignature()
{
}
float VoiceSignature::diff(const VoiceSignature &s1, const VoiceSignature &s2)
{
if(s1.isNull() || s2.isNull())
return 1000000;
#if 0
double result=0;
for(int x=0;x<WINDOW_NUMBER;x++)
for(int y=0;y<FOUR_NUMBER;y++)
{
double d1=s1.data[x][y]-s2.data[x][y];
result+= d1*d1;//*pond[x][y];
}
return result;
#endif
//DTW
// http://tcts.fpms.ac.be/cours/1005-08/speech/projects/2001/delfabro_henry_poitoux/
const int I=WINDOW_NUMBER;
const int J=WINDOW_NUMBER;
double g[I+1][J+1];
for(int f=1;f<=J;f++)
g[0][f]=10000000;
for(int f=1;f<=I;f++)
g[f][0]=10000000;
g[0][0]=0;
for(int i=1;i<=I;i++)
for(int j=1;j<=J;j++)
{
double d=0;
for(int f=0;f<FOUR_NUMBER;f++)
{
double d1=s1.data[i-1][f]-s2.data[j-1][f];
d+= d1*d1;//*pond[x][y];
}
d=sqrt(d);
g[i][j]=QMIN(QMIN( g[i-1][j]+d, g[i][j-1]+d ) , g[i-1][j-1]+d+d );
}
return g[I][J]/(I+J);
}
int VoiceSignature::size1()
{
return WINDOW_NUMBER;
}
int VoiceSignature::size2()
{
return FOUR_NUMBER;
}
TQMap<int, TQMap<int, double> > VoiceSignature::pond;
void VoiceSignature::write(KConfigBase *cfg, const TQString &key) const
{
TQStringList sl;
for(int x=0;x<WINDOW_NUMBER;x++)
for(int y=0;y<FOUR_NUMBER;y++)
{
sl.append( TQString::number(data[x][y]) );
}
cfg->writeEntry(key,sl);
}
void VoiceSignature::read(KConfigBase *cfg, const TQString &key)
{
TQStringList sl=cfg->readListEntry(key);
for(int x=0;x<WINDOW_NUMBER;x++)
for(int y=0;y<FOUR_NUMBER;y++)
{
data[x][y]= sl[x*FOUR_NUMBER+y].toDouble();
}
}
}