final commit to branch issue/270/tdelibs-V3. It includes the following files that were mistakenly omitted in that commit: tdecore/CMakeLists.txt tdecore/README.tdestringmatcher tdecore/tdeglobal.cpp tdecore/tdeglobal.h tdecore/tdestringmatcher.cpp tdecore/tdestringmatcher.h tdeio/tdeio/tdefileitem.cpp tdeio/tdeio/tdefileitem.h It also includes updates to the following files, some of which are based on recent feedback from @MicheleC: tdecore/tequivchars-mapping.h tdecore/tequivchars.cpp tdecore/tequivchars.h Signed-off-by: Vincent Reher <tde@4reher.org>issue/270/tdelibs-V4
parent
4c0dae60b2
commit
a39403fb8b
@ -0,0 +1,649 @@
|
|||||||
|
#include "tdestringmatcher.h"
|
||||||
|
#include "tequivchars.h"
|
||||||
|
|
||||||
|
#include <tdeglobal.h>
|
||||||
|
#include <tqregexp.h>
|
||||||
|
#include <kdebug.h>
|
||||||
|
|
||||||
|
#include <features.h>
|
||||||
|
#ifdef __GLIBC__
|
||||||
|
#include <fnmatch.h>
|
||||||
|
#pragma message "TSM using GLIBC fnmatch() for wildcard matching"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//================================================================================================
|
||||||
|
|
||||||
|
namespace TSM {
|
||||||
|
|
||||||
|
class AuxData
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
AuxData();
|
||||||
|
TQString patternConverted; // Pattern converted from original (e.g ANCHandling::EQUIVALENCE)
|
||||||
|
TQRegExp* matchEngine; // Used when PatternType::REGEX
|
||||||
|
#ifdef __GLIBC__
|
||||||
|
int fnmatchFlags; // Used by fnmatch() when PatternType::WILDCARD
|
||||||
|
#endif
|
||||||
|
bool isCaseSensitive; // PatternType::SUBSTRING
|
||||||
|
};
|
||||||
|
|
||||||
|
AuxData::AuxData()
|
||||||
|
{
|
||||||
|
isCaseSensitive = true;
|
||||||
|
#ifdef __GLIBC__
|
||||||
|
fnmatchFlags = FNM_EXTMATCH; // Bash shell option 'extglob'
|
||||||
|
#endif
|
||||||
|
matchEngine = nullptr;
|
||||||
|
patternConverted = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
} // End of namespace TSM
|
||||||
|
|
||||||
|
//================================================================================================
|
||||||
|
|
||||||
|
using namespace TSM;
|
||||||
|
|
||||||
|
typedef TQValueVector<AuxData> AuxDataList;
|
||||||
|
|
||||||
|
class TDEStringMatcher::TDEStringMatcherPrivate {
|
||||||
|
public:
|
||||||
|
|
||||||
|
// Properties that may be set / accessed through the TSM interface
|
||||||
|
TQString m_matchSpecString;
|
||||||
|
MatchSpecList m_matchSpecList;
|
||||||
|
|
||||||
|
// Properties that are internal implementation only
|
||||||
|
AuxDataList m_auxData;
|
||||||
|
void clearAll();
|
||||||
|
};
|
||||||
|
|
||||||
|
void TDEStringMatcher::TDEStringMatcherPrivate::clearAll()
|
||||||
|
{
|
||||||
|
m_matchSpecString = "";
|
||||||
|
m_matchSpecList.clear();
|
||||||
|
for ( size_t index = 0 ; index < m_auxData.count() ; index++ ) {
|
||||||
|
if ( m_auxData[index].matchEngine != nullptr ) {
|
||||||
|
TSMTRACE << "Freeing match engine " << m_auxData[index].matchEngine << endl;
|
||||||
|
delete m_auxData[index].matchEngine;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m_auxData.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
//================================================================================================
|
||||||
|
|
||||||
|
TDEStringMatcher::TDEStringMatcher()
|
||||||
|
{
|
||||||
|
TSMTRACE << "TSM::TDEStringMatcher(): New instance created: " << this << endl;
|
||||||
|
d = new TDEStringMatcherPrivate;
|
||||||
|
}
|
||||||
|
|
||||||
|
TDEStringMatcher::~TDEStringMatcher()
|
||||||
|
{
|
||||||
|
d->clearAll();
|
||||||
|
delete d;
|
||||||
|
TSMTRACE << "TSM::~TDEStringMatcher(): Instance destroyed: " << this << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//================================================================================================
|
||||||
|
// Match specification output functions
|
||||||
|
//================================================================================================
|
||||||
|
|
||||||
|
const TQString TDEStringMatcher::getMatchSpecString() const
|
||||||
|
{
|
||||||
|
return d->m_matchSpecString;
|
||||||
|
}
|
||||||
|
|
||||||
|
const MatchSpecList TDEStringMatcher::getMatchSpecs() const
|
||||||
|
{
|
||||||
|
return d->m_matchSpecList;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//================================================================================================
|
||||||
|
// Match specification input functions
|
||||||
|
//================================================================================================
|
||||||
|
|
||||||
|
bool TDEStringMatcher::setMatchSpecs( MatchSpecList newMatchSpecList )
|
||||||
|
{
|
||||||
|
TDEStringMatcherPrivate workArea;
|
||||||
|
|
||||||
|
TQStringList newMatchSpecs;
|
||||||
|
|
||||||
|
TSMTRACE << "TSM::setPatterns(): validating match specification list" << endl;
|
||||||
|
|
||||||
|
for ( MatchSpec matchSpec : newMatchSpecList ) {
|
||||||
|
|
||||||
|
if ( matchSpec.pattern.isEmpty() ) {
|
||||||
|
TSMTRACE << " Error: empty pattern!" << endl;
|
||||||
|
workArea.clearAll();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if ( matchSpec.pattern.find( TQChar(PatterStringDivider) ) >= 0 ) {
|
||||||
|
TSMTRACE << " Error: pattern contains reserved separator character" << endl;
|
||||||
|
workArea.clearAll();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
AuxData auxWork;
|
||||||
|
TQString inferredOptionString;
|
||||||
|
|
||||||
|
// Validate / process PatternType
|
||||||
|
|
||||||
|
auxWork.patternConverted = matchSpec.pattern;
|
||||||
|
switch ( matchSpec.patternType ) {
|
||||||
|
case PatternType::WILDCARD :
|
||||||
|
inferredOptionString += TQChar('w');
|
||||||
|
#ifndef __GLIBC__
|
||||||
|
auxWork.patternConverted = wildcardToRegex( auxWork.patternConverted );
|
||||||
|
TSMTRACE << " Converted wildcard expression '" << matchSpec.pattern << "' to regex '" << auxWork.patternConverted << "'" << endl;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case PatternType::REGEX :
|
||||||
|
inferredOptionString += TQChar('r');
|
||||||
|
break;
|
||||||
|
case PatternType::SUBSTRING :
|
||||||
|
inferredOptionString += TQChar('s');
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
TSMTRACE << " Error: pattern type out of range" << endl;
|
||||||
|
workArea.clearAll();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate / process ANCHandling
|
||||||
|
|
||||||
|
TQString before = auxWork.patternConverted;
|
||||||
|
switch ( matchSpec.ancHandling ) {
|
||||||
|
case ANCHandling::CASE_SENSITIVE :
|
||||||
|
inferredOptionString += TQChar('c');
|
||||||
|
auxWork.isCaseSensitive = true;
|
||||||
|
break;
|
||||||
|
case ANCHandling::CASE_INSENSITIVE :
|
||||||
|
inferredOptionString += TQChar('i');
|
||||||
|
auxWork.isCaseSensitive = false;
|
||||||
|
#ifdef __GLIBC__
|
||||||
|
auxWork.fnmatchFlags |= FNM_CASEFOLD;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case ANCHandling::EQUIVALENCE :
|
||||||
|
inferredOptionString += TQChar('e');
|
||||||
|
auxWork.isCaseSensitive = true;
|
||||||
|
auxWork.patternConverted = TDEGlobal::equivChars()->replaceChars( auxWork.patternConverted, true );
|
||||||
|
TSMTRACE << " Converted match pattern '" << before << "' to equivalent '" << auxWork.patternConverted << "'" << endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
TSMTRACE << " Error: alphabetic character handling specification out of range" << endl;
|
||||||
|
workArea.clearAll();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( matchSpec.expectMatch )
|
||||||
|
inferredOptionString += TQChar('=');
|
||||||
|
else
|
||||||
|
inferredOptionString += TQChar('!');
|
||||||
|
|
||||||
|
// Test validity of pattern
|
||||||
|
|
||||||
|
TQRegExp rxWork;
|
||||||
|
int result;
|
||||||
|
|
||||||
|
switch ( matchSpec.patternType ) {
|
||||||
|
case PatternType::WILDCARD :
|
||||||
|
#ifdef __GLIBC__ // Test wildcard expression using a subject matter expert
|
||||||
|
result = fnmatch(
|
||||||
|
auxWork.patternConverted.local8Bit().data(),
|
||||||
|
auxWork.patternConverted.local8Bit().data(),
|
||||||
|
auxWork.fnmatchFlags
|
||||||
|
); // Comparison should fail
|
||||||
|
switch ( result ) {
|
||||||
|
case 0: // matched
|
||||||
|
case FNM_NOMATCH: // not matched
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
TSMTRACE << " Error: invalid wildcard syntax" << endl;
|
||||||
|
workArea.clearAll();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
#endif // Otherwise we will test wildcard expression as one converted to a regex
|
||||||
|
case PatternType::REGEX :
|
||||||
|
// Prepare regex
|
||||||
|
rxWork.setPattern( auxWork.patternConverted );
|
||||||
|
rxWork.setCaseSensitive( auxWork.isCaseSensitive );
|
||||||
|
// Test regex
|
||||||
|
if ( rxWork.isValid() ) {
|
||||||
|
auxWork.matchEngine = new TQRegExp;
|
||||||
|
*auxWork.matchEngine = rxWork;
|
||||||
|
TSMTRACE << "AuxData: Allocated regex engine for matching '" << auxWork.matchEngine->pattern() << "'" << endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
TSMTRACE << " Error: invalid regex syntax'" << endl;
|
||||||
|
workArea.clearAll();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
// if (! rxWork.isReallyWhatUserIntended() ) { HA HA
|
||||||
|
}
|
||||||
|
|
||||||
|
// This particular match specification is good
|
||||||
|
|
||||||
|
newMatchSpecs.append( inferredOptionString );
|
||||||
|
newMatchSpecs.append( matchSpec.pattern );
|
||||||
|
workArea.m_auxData.append( auxWork );
|
||||||
|
}
|
||||||
|
|
||||||
|
// All proposed match specifications are good, update everything accordingly
|
||||||
|
|
||||||
|
workArea.m_matchSpecList = newMatchSpecList;
|
||||||
|
workArea.m_matchSpecString = newMatchSpecs.join( TQChar(PatterStringDivider) );
|
||||||
|
d->clearAll();
|
||||||
|
*d = workArea;
|
||||||
|
//-Debug: TSMTRACE << " Notifying slots of pattern change" << endl;
|
||||||
|
emit patternsChanged();
|
||||||
|
//-Debug: TSMTRACE << " All slots have been notified" << endl;
|
||||||
|
TSMTRACE << "TSM::setPatterns(): Patterns were successfully regenerated from list" << endl << endl;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
//=================================================================================================
|
||||||
|
|
||||||
|
bool TDEStringMatcher::setMatchSpecs( TQString newMatchSpecString )
|
||||||
|
{
|
||||||
|
if ( newMatchSpecString == d->m_matchSpecString )
|
||||||
|
return true;
|
||||||
|
|
||||||
|
TDEStringMatcherPrivate workArea;
|
||||||
|
|
||||||
|
MatchSpec matchSpec = {
|
||||||
|
PatternType::DEFAULT,
|
||||||
|
ANCHandling::DEFAULT,
|
||||||
|
true, // seeking matches, not non-matches
|
||||||
|
""
|
||||||
|
};
|
||||||
|
|
||||||
|
TSMTRACE << "TSM::setPatterns: Proposed match specification string: <" << newMatchSpecString << ">" << endl;
|
||||||
|
|
||||||
|
if ( newMatchSpecString.isEmpty() ) {
|
||||||
|
TSMTRACE << " Empty pattern string => match specifications will be cleared" << endl;
|
||||||
|
d->m_matchSpecList.clear();
|
||||||
|
d->m_matchSpecString = "";
|
||||||
|
emit patternsChanged();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
TQStringList newMatchSpecs = TQStringList::split( PatterStringDivider, newMatchSpecString, true );
|
||||||
|
|
||||||
|
if ( newMatchSpecs.count() % 2 != 0 ) {
|
||||||
|
TSMTRACE << " Error: match specification string must contain an even number of components" << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool processingOptionString = true; // expected format: option string , pattern string, ...
|
||||||
|
|
||||||
|
for ( TQString &specification : newMatchSpecs ) {
|
||||||
|
|
||||||
|
if ( processingOptionString ) {
|
||||||
|
specification = specification.lower();
|
||||||
|
TSMTRACE << " Processing match option string: '" << specification << "'" << endl;
|
||||||
|
for ( int i = 0 ; i < specification.length() ; i++ ) {
|
||||||
|
|
||||||
|
TQChar optionChar = specification[i];
|
||||||
|
//Debug: TSMTRACE << " Option character: '" << optionChar << "'" << endl;
|
||||||
|
|
||||||
|
switch ( optionChar ) {
|
||||||
|
case 'r' : matchSpec.patternType = PatternType::REGEX ; break;
|
||||||
|
case 'w' : matchSpec.patternType = PatternType::WILDCARD ; break;
|
||||||
|
case 's' : matchSpec.patternType = PatternType::SUBSTRING ; break;
|
||||||
|
case 'c' : matchSpec.ancHandling = ANCHandling::CASE_SENSITIVE ; break;
|
||||||
|
case 'i' : matchSpec.ancHandling = ANCHandling::CASE_INSENSITIVE; break;
|
||||||
|
case 'e' : matchSpec.ancHandling = ANCHandling::EQUIVALENCE ; break;
|
||||||
|
case '=' : matchSpec.expectMatch = true ; break;
|
||||||
|
case '!' : matchSpec.expectMatch = false ; break;
|
||||||
|
default:
|
||||||
|
// We reserve ALL other possible option characters for future use!
|
||||||
|
TSMTRACE << " Error: invalid option character" << endl;
|
||||||
|
workArea.clearAll();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
processingOptionString = false; // next spec should be a pattern string
|
||||||
|
} // processingOptionString
|
||||||
|
|
||||||
|
else { // ! processingOptionString
|
||||||
|
|
||||||
|
TSMTRACE << " Processing match pattern string: '" << specification << "'" << endl;
|
||||||
|
|
||||||
|
if ( specification.isEmpty() ) {
|
||||||
|
TSMTRACE << " Error: empty pattern!" << endl;
|
||||||
|
workArea.clearAll();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
AuxData auxWork;
|
||||||
|
|
||||||
|
// Validate / process PatternType
|
||||||
|
|
||||||
|
auxWork.patternConverted = specification;
|
||||||
|
switch ( matchSpec.patternType ) {
|
||||||
|
case PatternType::WILDCARD :
|
||||||
|
#ifndef __GLIBC__
|
||||||
|
auxWork.patternConverted = wildcardToRegex( specification );
|
||||||
|
TSMTRACE << " Converted wildcard expression '" << specification << "' to regex '" << auxWork.patternConverted << "'" << endl;
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
case PatternType::REGEX :
|
||||||
|
case PatternType::SUBSTRING :
|
||||||
|
break;
|
||||||
|
default :
|
||||||
|
// This should never arise since the content of this field was set within this function
|
||||||
|
kdWarning() << "Error while processing '" << specification
|
||||||
|
<< "' pattern type out of range: " << (uchar) matchSpec.patternType
|
||||||
|
<< endl;
|
||||||
|
workArea.clearAll();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate / process ANCHandling
|
||||||
|
|
||||||
|
TQString before = auxWork.patternConverted;
|
||||||
|
switch ( matchSpec.ancHandling ) {
|
||||||
|
case ANCHandling::CASE_SENSITIVE :
|
||||||
|
auxWork.isCaseSensitive = true;
|
||||||
|
break;
|
||||||
|
case ANCHandling::CASE_INSENSITIVE :
|
||||||
|
auxWork.isCaseSensitive = false;
|
||||||
|
#ifdef __GLIBC__
|
||||||
|
auxWork.fnmatchFlags |= FNM_CASEFOLD;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case ANCHandling::EQUIVALENCE :
|
||||||
|
auxWork.isCaseSensitive = true;
|
||||||
|
auxWork.patternConverted = TDEGlobal::equivChars()->replaceChars( auxWork.patternConverted, true );
|
||||||
|
TSMTRACE << " Converted match pattern '" << before << "' to equivalent '" << auxWork.patternConverted << "'" << endl;
|
||||||
|
break;
|
||||||
|
default: break;
|
||||||
|
kdWarning() << "Error while processing '" << specification
|
||||||
|
<< "' alphabetic character handling specification out of range: " << (uchar) matchSpec.ancHandling
|
||||||
|
<< endl;
|
||||||
|
workArea.clearAll();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test validity of pattern
|
||||||
|
|
||||||
|
TQRegExp rxWork; // single working copy == each pattern inherits previous options
|
||||||
|
int result;
|
||||||
|
|
||||||
|
switch ( matchSpec.patternType ) {
|
||||||
|
case PatternType::WILDCARD :
|
||||||
|
#ifdef __GLIBC__ // Test wildcard expression using a subject matter expert
|
||||||
|
result = fnmatch(
|
||||||
|
auxWork.patternConverted.local8Bit().data(),
|
||||||
|
auxWork.patternConverted.local8Bit().data(),
|
||||||
|
auxWork.fnmatchFlags
|
||||||
|
); // Comparison should fail
|
||||||
|
switch ( result ) {
|
||||||
|
case 0: // matched
|
||||||
|
case FNM_NOMATCH: // not matched
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
TSMTRACE << " Error: invalid wildcard syntax" << endl;
|
||||||
|
workArea.clearAll();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
#endif // Otherwise we will test wildcard expression as one converted to x regex
|
||||||
|
case PatternType::REGEX :
|
||||||
|
// Prepare regex
|
||||||
|
rxWork.setPattern( auxWork.patternConverted );
|
||||||
|
rxWork.setCaseSensitive( auxWork.isCaseSensitive );
|
||||||
|
// Test regex
|
||||||
|
if ( rxWork.isValid() ) {
|
||||||
|
auxWork.matchEngine = new TQRegExp;
|
||||||
|
*auxWork.matchEngine = rxWork;
|
||||||
|
TSMTRACE << " AuxData: Allocated regex engine " << auxWork.matchEngine << "for pattern: " << auxWork.matchEngine->pattern() << endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
TSMTRACE << " Error: invalid regex syntax" << endl;
|
||||||
|
workArea.clearAll();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
// if (! rxWork.isReallyWhatUserIntended() ) { HA HA
|
||||||
|
}
|
||||||
|
|
||||||
|
matchSpec.pattern = specification;
|
||||||
|
workArea.m_matchSpecList.push_back( matchSpec );
|
||||||
|
workArea.m_auxData.append( auxWork );
|
||||||
|
|
||||||
|
processingOptionString = true; // next spec should be an option string
|
||||||
|
} // ! processingOptionString completed
|
||||||
|
}
|
||||||
|
|
||||||
|
workArea.m_matchSpecString = newMatchSpecString;
|
||||||
|
d->clearAll();
|
||||||
|
*d = workArea;
|
||||||
|
TSMTRACE << " Final patternString: '" << d->m_matchSpecString << "'" << endl;
|
||||||
|
TSMTRACE << " Number of match patterns in list: '" << d->m_matchSpecList.count() << "'" << endl;
|
||||||
|
//-Debug: TSMTRACE << " Notifying slots of pattern change" << endl;
|
||||||
|
emit patternsChanged();
|
||||||
|
//-Debug: TSMTRACE << " All slots have been notified" << endl;
|
||||||
|
TSMTRACE << "TSM::setPatterns(): Patterns were successfully regenerated from string" << endl << endl;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
//================================================================================================
|
||||||
|
// Match functions
|
||||||
|
//================================================================================================
|
||||||
|
|
||||||
|
bool TDEStringMatcher::matchAny( const TQString& stringToMatch ) const
|
||||||
|
{
|
||||||
|
/* DEBUG
|
||||||
|
TSMTRACE << "TSM:matchAny(): Attempting to match string '" << stringToMatch << "' against stored patterns" << endl;
|
||||||
|
if ( d->m_matchSpecList.isEmpty() ) {
|
||||||
|
//-Debug: TSMTRACE << "Match failed on empty pattern list!" << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
TQString equivalentString;
|
||||||
|
|
||||||
|
for ( size_t index = 0 ; index < d->m_matchSpecList.count() ; index++ )
|
||||||
|
{
|
||||||
|
TQString matchWhat = stringToMatch;
|
||||||
|
TQString matchThis = d->m_auxData[index].patternConverted;
|
||||||
|
|
||||||
|
if ( d->m_matchSpecList[index].ancHandling == ANCHandling::EQUIVALENCE ) {
|
||||||
|
if ( equivalentString.isEmpty() ) {
|
||||||
|
equivalentString = TDEGlobal::equivChars()->replaceChars( stringToMatch, false ) ;
|
||||||
|
}
|
||||||
|
matchWhat = equivalentString;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool matchFound = false;
|
||||||
|
switch ( d->m_matchSpecList[index].patternType ) {
|
||||||
|
case PatternType::WILDCARD :
|
||||||
|
#ifdef __GLIBC__
|
||||||
|
matchFound = ( fnmatch(
|
||||||
|
matchThis.local8Bit().data(),
|
||||||
|
matchWhat.local8Bit().data(),
|
||||||
|
d->m_auxData[index].fnmatchFlags
|
||||||
|
) == 0 );
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
case PatternType::REGEX :
|
||||||
|
matchFound = ( d->m_auxData[index].matchEngine->search( matchWhat ) >= 0 );
|
||||||
|
break;
|
||||||
|
case PatternType::SUBSTRING :
|
||||||
|
matchFound = ( matchWhat.find( matchThis, 0, d->m_auxData[index].isCaseSensitive ) >= 0 );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( matchFound == d->m_matchSpecList[index].expectMatch ) {
|
||||||
|
TSMTRACE << " Success! match of pattern '" << matchThis << "' against '" << matchWhat << "' turned out as expected" << endl;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TSMTRACE << " Match failed, there were no pattern matches against '" << stringToMatch << "' that turned out as expected" << endl;
|
||||||
|
return false ;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TDEStringMatcher::matchAll( const TQString& stringToMatch ) const
|
||||||
|
{
|
||||||
|
TSMTRACE << "TSM:matchAll(): Attempting to match string '" << stringToMatch << "' against stored patterns" << endl;
|
||||||
|
if ( d->m_matchSpecList.isEmpty() ) {
|
||||||
|
//-Debug: TSMTRACE << "Match failed on empty pattern list!" << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
TQString equivalentString;
|
||||||
|
|
||||||
|
for ( size_t index = 0 ; index < d->m_matchSpecList.count() ; index++ )
|
||||||
|
{
|
||||||
|
TQString matchWhat = stringToMatch;
|
||||||
|
TQString matchThis = d->m_auxData[index].patternConverted;
|
||||||
|
|
||||||
|
if ( d->m_matchSpecList[index].ancHandling == ANCHandling::EQUIVALENCE ) {
|
||||||
|
if ( equivalentString.isEmpty() ) {
|
||||||
|
equivalentString = TDEGlobal::equivChars()->replaceChars( stringToMatch, false ) ;
|
||||||
|
}
|
||||||
|
matchWhat = equivalentString;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool matchFound = false;
|
||||||
|
switch ( d->m_matchSpecList[index].patternType ) {
|
||||||
|
case PatternType::WILDCARD :
|
||||||
|
#ifdef __GLIBC__
|
||||||
|
matchFound = ( fnmatch(
|
||||||
|
matchThis.local8Bit().data(),
|
||||||
|
matchWhat.local8Bit().data(),
|
||||||
|
d->m_auxData[index].fnmatchFlags
|
||||||
|
) == 0 );
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
case PatternType::REGEX :
|
||||||
|
matchFound = ( d->m_auxData[index].matchEngine->search( matchWhat ) >= 0 );
|
||||||
|
break;
|
||||||
|
case PatternType::SUBSTRING :
|
||||||
|
matchFound = ( matchWhat.find( matchThis, 0, d->m_auxData[index].isCaseSensitive ) >= 0 );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( matchFound != d->m_matchSpecList[index].expectMatch ) {
|
||||||
|
TSMTRACE << " Match of pattern '" << matchThis << "' against '" << matchWhat << "' did not turn out as expected" << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TSMTRACE << " Expected pattern matching succeeded" << endl;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
//================================================================================================
|
||||||
|
// Utility functions
|
||||||
|
//================================================================================================
|
||||||
|
|
||||||
|
/*
|
||||||
|
The following code is a modified copy of that found in tqt3/src/tools/qregexp.cpp.
|
||||||
|
We export this as utility function for applications that wish to convert a basic
|
||||||
|
wildcard expression into a basic regular expression. TSM will not use this unless
|
||||||
|
GLIBC fnmatch() is not available.
|
||||||
|
*/
|
||||||
|
TQString TDEStringMatcher::wildcardToRegex( const TQString& wildcardPattern )
|
||||||
|
{
|
||||||
|
int wclen = wildcardPattern.length();
|
||||||
|
TQString rx = TQString::fromLatin1( "" );
|
||||||
|
int i = 0;
|
||||||
|
const TQChar *wc = wildcardPattern.unicode();
|
||||||
|
while ( i < wclen ) {
|
||||||
|
TQChar c = wc[i++];
|
||||||
|
switch ( c.unicode() ) {
|
||||||
|
case '*':
|
||||||
|
rx += TQString::fromLatin1( ".*" );
|
||||||
|
break;
|
||||||
|
case '?':
|
||||||
|
rx += TQChar( '.' );
|
||||||
|
break;
|
||||||
|
case '$':
|
||||||
|
case '(':
|
||||||
|
case ')':
|
||||||
|
case '+':
|
||||||
|
case '.':
|
||||||
|
case '\\':
|
||||||
|
case '^':
|
||||||
|
case '{':
|
||||||
|
case '|':
|
||||||
|
case '}':
|
||||||
|
rx += TQChar( '\\' );
|
||||||
|
rx += c;
|
||||||
|
break;
|
||||||
|
case '[':
|
||||||
|
rx += c;
|
||||||
|
/* This is not correct, POSIX states that negation character is '!'
|
||||||
|
if ( wc[i] == TQChar('^') )
|
||||||
|
rx += wc[i++];
|
||||||
|
*/
|
||||||
|
if ( wc[i] == TQChar('!') ) {
|
||||||
|
rx += TQChar('^');
|
||||||
|
i++;
|
||||||
|
} else if ( wc[i] == TQChar('^') ) {
|
||||||
|
rx += TQChar( '\\' );
|
||||||
|
rx += wc[i++];
|
||||||
|
}
|
||||||
|
if ( i < wclen ) {
|
||||||
|
if ( rx[i] == ']' )
|
||||||
|
rx += wc[i++];
|
||||||
|
while ( i < wclen && wc[i] != TQChar(']') ) {
|
||||||
|
if ( wc[i] == '\\' )
|
||||||
|
rx += TQChar( '\\' );
|
||||||
|
rx += wc[i++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
rx += c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Wildcard patterns must match entire string */
|
||||||
|
return TQChar('^') + rx + TQChar('$');
|
||||||
|
/* TBD: Add support for extglob */
|
||||||
|
}
|
||||||
|
static TQString escapeRegexChars( const TQString& basicString )
|
||||||
|
{
|
||||||
|
int wclen = basicString.length();
|
||||||
|
TQString outputString = TQString::fromLatin1( "" );
|
||||||
|
int i = 0;
|
||||||
|
const TQChar *wc = basicString.unicode();
|
||||||
|
while ( i < wclen ) {
|
||||||
|
TQChar c = wc[i++];
|
||||||
|
switch ( c.unicode() ) {
|
||||||
|
case '+':
|
||||||
|
case '.':
|
||||||
|
case '^':
|
||||||
|
case '(':
|
||||||
|
case ')':
|
||||||
|
case '[':
|
||||||
|
case ']':
|
||||||
|
case '{':
|
||||||
|
case '}':
|
||||||
|
case '|':
|
||||||
|
case '$':
|
||||||
|
case '?':
|
||||||
|
case '*':
|
||||||
|
case '\\':
|
||||||
|
outputString += TQChar( '\\' );
|
||||||
|
outputString += c;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
outputString += c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return outputString;
|
||||||
|
}
|
||||||
|
|
||||||
|
//================================================================================================
|
||||||
|
|
||||||
|
#include "tdestringmatcher.moc"
|
@ -0,0 +1,134 @@
|
|||||||
|
#ifndef TDESTRINGMATCHER_H
|
||||||
|
#define TDESTRINGMATCHER_H
|
||||||
|
|
||||||
|
#include "tdelibs_export.h"
|
||||||
|
|
||||||
|
#include <tqobject.h>
|
||||||
|
#include <tqvaluevector.h>
|
||||||
|
|
||||||
|
#define TSMTRACE kdWarning() << "<TSMTRACE> "
|
||||||
|
|
||||||
|
namespace TSM
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Enumeration used by the TDEStringMatcher class
|
||||||
|
* defining types of patterns to be matched
|
||||||
|
*/
|
||||||
|
enum class PatternType: uchar
|
||||||
|
{
|
||||||
|
REGEX,
|
||||||
|
WILDCARD,
|
||||||
|
SUBSTRING,
|
||||||
|
//OTHER,
|
||||||
|
DEFAULT = REGEX
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enumeration used by the TDEStringMatcher class
|
||||||
|
* defining special handling of alphanumeric characters
|
||||||
|
*/
|
||||||
|
enum class ANCHandling: uchar
|
||||||
|
{
|
||||||
|
CASE_SENSITIVE = 0, // No handling, each character distinct
|
||||||
|
CASE_INSENSITIVE = 1, // Alphabetic case variants are same
|
||||||
|
EQUIVALENCE = 2, // Alphanumeric equivalents are same
|
||||||
|
DEFAULT = CASE_SENSITIVE
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Structure used by the TDEStringMatcher class
|
||||||
|
* representing properties of a single match specification.
|
||||||
|
*/
|
||||||
|
struct MatchSpec
|
||||||
|
{
|
||||||
|
PatternType patternType;
|
||||||
|
ANCHandling ancHandling;
|
||||||
|
bool expectMatch; // "matching" vs. "not matching"
|
||||||
|
TQString pattern;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Container used in a TDEStringMatcher object
|
||||||
|
* representing multiple match specifications.
|
||||||
|
*/
|
||||||
|
typedef TQValueVector<MatchSpec> MatchSpecList;
|
||||||
|
|
||||||
|
// Use horizontal tab as m_patternString separator
|
||||||
|
inline constexpr char PatterStringDivider { '\t' };
|
||||||
|
|
||||||
|
} // End of namespace TSM
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generic string matcher class.
|
||||||
|
*/
|
||||||
|
class TDECORE_EXPORT TDEStringMatcher : public TQObject
|
||||||
|
{
|
||||||
|
Q_OBJECT
|
||||||
|
public:
|
||||||
|
|
||||||
|
TDEStringMatcher();
|
||||||
|
~TDEStringMatcher();
|
||||||
|
|
||||||
|
/**
|
||||||
|
@return list of currently defined match specifications.
|
||||||
|
*/
|
||||||
|
const TSM::MatchSpecList getMatchSpecs() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
@return string encoding list of currently defined match specifications.
|
||||||
|
*/
|
||||||
|
const TQString getMatchSpecString() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
Use @param newMatchSpecList to generate the internal list of match
|
||||||
|
specifications to be used for pattern matching.
|
||||||
|
*/
|
||||||
|
bool setMatchSpecs( TSM::MatchSpecList newMatchSpecList );
|
||||||
|
|
||||||
|
/**
|
||||||
|
Use specially encoded @param newPatternString to generate the internal
|
||||||
|
list of match specifications to be used for pattern matching. Refer
|
||||||
|
to file README.tdestringmatcher in tdelibs/tdecore source code for
|
||||||
|
more information on how the input string should be formatted.
|
||||||
|
*/
|
||||||
|
bool setMatchSpecs( TQString newMatchSpecString );
|
||||||
|
|
||||||
|
/**
|
||||||
|
@return whether or not @param stringToMatch matches any of
|
||||||
|
the current match specifications.
|
||||||
|
*/
|
||||||
|
bool matchAny( const TQString& stringToMatch ) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
@return whether or not @param stringToMatch matches all of
|
||||||
|
the current match specifications.
|
||||||
|
*/
|
||||||
|
bool matchAll( const TQString& stringToMatch ) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
@return a basic regular expression formed by converting the basic
|
||||||
|
wildcard expression in @param wildcardPattern.
|
||||||
|
*/
|
||||||
|
TQString wildcardToRegex( const TQString& wildcardPattern );
|
||||||
|
|
||||||
|
/**
|
||||||
|
@return a string that is @param basicString with all special regular
|
||||||
|
expression characters escaped. Useful for regular expression engines
|
||||||
|
that do not support /Q.../E.
|
||||||
|
*/
|
||||||
|
TQString escapeRegexChars( const TQString& basicString );
|
||||||
|
|
||||||
|
|
||||||
|
signals:
|
||||||
|
|
||||||
|
void patternsChanged();
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
class TDEStringMatcherPrivate;
|
||||||
|
TDEStringMatcherPrivate *d;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,241 @@
|
|||||||
|
#undef REGEX_IS_PCRE2
|
||||||
|
#define OPTIMIZE_ASCII_LOOKUP
|
||||||
|
|
||||||
|
#ifdef REGEXP_IS_PCRE2
|
||||||
|
#pragma message "############ Assuming regular expressions are PCRE2 ############"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef OPTIMIZE_ASCII_LOOKUP
|
||||||
|
#pragma message "############ ASCII characters will be processed separately ############"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "tequivchars.h"
|
||||||
|
|
||||||
|
//typedef wchar_t CHAR16;
|
||||||
|
//typedef unsigned short CHAR16;
|
||||||
|
typedef TQChar CHAR16;
|
||||||
|
|
||||||
|
class TEquivChars_Private
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
struct defaultCollation {
|
||||||
|
CHAR16 character;
|
||||||
|
CHAR16 collatesTo;
|
||||||
|
};
|
||||||
|
|
||||||
|
const defaultCollation EquivalentsTable // terminating ';' is provided in include file
|
||||||
|
#include "tequivchars-mapping.h"
|
||||||
|
uint EquivTableROWS = sizeof(EquivalentsTable)/sizeof(EquivalentsTable[0]);
|
||||||
|
};
|
||||||
|
|
||||||
|
TEquivChars::TEquivChars()
|
||||||
|
{
|
||||||
|
p = new TEquivChars_Private;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEquivChars::~TEquivChars()
|
||||||
|
{
|
||||||
|
delete p;
|
||||||
|
}
|
||||||
|
|
||||||
|
TQString TEquivChars::replaceChars( const TQString &inputString, bool isRegex )
|
||||||
|
{
|
||||||
|
int inStrLen = inputString.length();
|
||||||
|
TQString outString = TQString::fromLatin1( "" );
|
||||||
|
outString.reserve( inStrLen );
|
||||||
|
const TQChar *char16 = inputString.unicode();
|
||||||
|
|
||||||
|
bool backSlashed = false; // \_
|
||||||
|
bool startedCharClass = false; // Previous character was starting '[' of character class
|
||||||
|
bool inCharacterClass = false; // [___]
|
||||||
|
bool inPosixBracketExpr = false; // [:___:]
|
||||||
|
#ifdef REGEXP_IS_PCRE2
|
||||||
|
bool quoteLiteral = false; // \Q___\E
|
||||||
|
bool inBraceExpr = false; // \c{___} where 'c' is any of: 'x' 'o' 'p' 'P' 'N' 'g'
|
||||||
|
bool inDirective = false; // (*___)
|
||||||
|
bool inGroupName = false; // (?<___>
|
||||||
|
#endif // REGEXP_IS_PCRE2
|
||||||
|
CHAR16 currChar = 0;
|
||||||
|
CHAR16 prevChar = 0;
|
||||||
|
CHAR16 nextChar = 0;
|
||||||
|
|
||||||
|
for ( int i = 0 ; i < inStrLen ; outString[i] = CHAR16(currChar), i++ ) {
|
||||||
|
|
||||||
|
prevChar = currChar;
|
||||||
|
currChar = char16[i].unicode();
|
||||||
|
|
||||||
|
if ( isRegex ) {
|
||||||
|
|
||||||
|
/*
|
||||||
|
Look for regex characters and character sequences
|
||||||
|
that should never be converted to an equivalent.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if ( i < ( inStrLen - 1 ) )
|
||||||
|
nextChar = char16[i+1].unicode();
|
||||||
|
else
|
||||||
|
nextChar = 0;
|
||||||
|
|
||||||
|
if ( currChar == '\\' ) {
|
||||||
|
backSlashed = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't convert backSlashed characters
|
||||||
|
if ( backSlashed ) {
|
||||||
|
#ifdef REGEXP_IS_PCRE2
|
||||||
|
switch (currChar) {
|
||||||
|
case 'Q' : quoteLiteral = true; break; // Entering literal \Q___\E
|
||||||
|
case 'E' : quoteLiteral = false; break; // Leaving literal \Q___\E
|
||||||
|
case 'N' : // Entering Unicode codepoint specification \N{U+___} ?
|
||||||
|
case 'P' : // Entering (negated) Unicode property specification \p{} ?
|
||||||
|
case 'p' : // Entering Unicode property specification \p{} ?
|
||||||
|
case 'g' : // Entering a named backreference \g{___} ?
|
||||||
|
if ( nextChar == '{' ) inBraceExpr = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#endif // REGEXP_IS_PCRE2
|
||||||
|
backSlashed = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef REGEXP_IS_PCRE2
|
||||||
|
if ( quoteLiteral )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if ( inBraceExpr ) {
|
||||||
|
// Is it time to leave brace expression {___} ?
|
||||||
|
if ( nextChar == '}' ) inBraceExpr = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
#endif // REGEXP_IS_PCRE2
|
||||||
|
|
||||||
|
if ( startedCharClass ) {
|
||||||
|
switch (currChar) {
|
||||||
|
case '^' : // Negated character class, proceed to next character
|
||||||
|
continue; // Bypass converting this special character
|
||||||
|
case ']' : // Treat as part of character class, not as a closure
|
||||||
|
case ':' : // Treat as part of character class, not as start of bracket expression
|
||||||
|
startedCharClass = false;
|
||||||
|
continue; // Bypass converting these special characters
|
||||||
|
}
|
||||||
|
startedCharClass = false;
|
||||||
|
} // startedCharClass
|
||||||
|
|
||||||
|
if ( inCharacterClass ) {
|
||||||
|
|
||||||
|
if ( inPosixBracketExpr ) {
|
||||||
|
// Is it time to leave POSIX bracket expression [:___:] ?
|
||||||
|
if ( currChar == ':' && nextChar == ']' ) inPosixBracketExpr = false;
|
||||||
|
continue;
|
||||||
|
} // inPosixBracketExpr
|
||||||
|
|
||||||
|
else { // ! inPosixBracketExpr
|
||||||
|
|
||||||
|
if ( prevChar == '[' && currChar == ':' ) {
|
||||||
|
// Enter POSIX bracket expression [:___:]
|
||||||
|
inPosixBracketExpr = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( currChar == ']' ) {
|
||||||
|
// Leaving character class [___]
|
||||||
|
inCharacterClass = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // ! inPosixBracketExpr
|
||||||
|
|
||||||
|
} // inCharacterClass
|
||||||
|
|
||||||
|
else { // ! inCharacterClass
|
||||||
|
|
||||||
|
switch (currChar) {
|
||||||
|
|
||||||
|
case '[' :
|
||||||
|
// Entering a character class [___]
|
||||||
|
startedCharClass = true;
|
||||||
|
inCharacterClass = true;
|
||||||
|
continue;
|
||||||
|
break;
|
||||||
|
#ifdef REGEXP_IS_PCRE2
|
||||||
|
case '*' :
|
||||||
|
if ( prevChar != '(' ) continue;
|
||||||
|
// Entering a PCRE2 directive (*___)
|
||||||
|
inDirective = true;
|
||||||
|
continue;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case '?' :
|
||||||
|
if ( prevChar != '(' ) continue;
|
||||||
|
if ( nextChar != '<' ) continue;
|
||||||
|
// Entering PCRE2 group name (?<___>)
|
||||||
|
inGroupName = true;
|
||||||
|
continue;
|
||||||
|
break;
|
||||||
|
#endif // REGEXP_IS_PCRE2
|
||||||
|
}
|
||||||
|
#ifdef REGEXP_IS_PCRE2
|
||||||
|
if ( inDirective ) {
|
||||||
|
// Is it time to leave PCRE2 directive (*___) ?
|
||||||
|
if (currChar == ')' ) inDirective = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( inGroupName ) {
|
||||||
|
// Is it time to leave PCRE2 group name (?<___>) ?
|
||||||
|
if (currChar == '>' ) inGroupName = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
#endif // REGEXP_IS_PCRE2
|
||||||
|
} // ! inCharacterClass
|
||||||
|
|
||||||
|
/*
|
||||||
|
If we have reached here, this regex character is a
|
||||||
|
candidate for potential conversion to an equivalent.
|
||||||
|
*/
|
||||||
|
|
||||||
|
} // isRegex
|
||||||
|
|
||||||
|
//-Debug: std::cerr << "Converting '" << TQString(currChar).utf8().data() << "' to '";
|
||||||
|
|
||||||
|
#ifdef OPTIMIZE_ASCII_LOOKUP
|
||||||
|
// We can process ASCII quickly without using lookup table
|
||||||
|
unsigned short codepoint = currChar.unicode();
|
||||||
|
if ( codepoint < 128 ) {
|
||||||
|
if ( codepoint > 64 && codepoint < 91 ) // convert upper case ASCII
|
||||||
|
currChar = TQChar(codepoint + 32 ); // to corresponding lower case
|
||||||
|
// All other ASCII characters are equivalent to themselves
|
||||||
|
//-Debug: std::cerr << TQString(currChar).utf8().data() << "' (ascii)" << std::endl;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Use a simple binary search to look up an equivalent character
|
||||||
|
int low = 0;
|
||||||
|
int high = p->EquivTableROWS - 1;
|
||||||
|
while (low <= high) {
|
||||||
|
int mid = low + (high - low) / 2;
|
||||||
|
if ( currChar == p->EquivalentsTable[mid].character ) {
|
||||||
|
// Found equivalent character, use it instead
|
||||||
|
currChar = p->EquivalentsTable[mid].collatesTo;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if ( p->EquivalentsTable[mid].character < currChar )
|
||||||
|
low = mid + 1;
|
||||||
|
else
|
||||||
|
high = mid - 1;
|
||||||
|
}
|
||||||
|
//-Debug: std::cerr << TQString(currChar).utf8().data() << "'" << std::endl;
|
||||||
|
|
||||||
|
/* FIXME: Possible ideas for optimizing table lookup speed
|
||||||
|
(1) Detect & handle ASCII (<128) characters separately. *DONE*
|
||||||
|
(2) Split table into multiple lookup tables and search each
|
||||||
|
in order of descending likelihood of character match.
|
||||||
|
*/
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return outString;
|
||||||
|
}
|
@ -0,0 +1,34 @@
|
|||||||
|
#ifndef TEQUIVCHARS_H
|
||||||
|
#define TEQUIVCHARS_H
|
||||||
|
|
||||||
|
#include "tdelibs_export.h"
|
||||||
|
|
||||||
|
#include <tqstring.h>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class representing a mapping of each alphanumeric character to its "collating
|
||||||
|
* equivalent" as defined by the Default Unicode Collation Entity Table (DUCET).
|
||||||
|
* The mapping is limited to single-codepoint characters <= U+FFFF.
|
||||||
|
*/
|
||||||
|
class TDECORE_EXPORT TEquivChars
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
TEquivChars();
|
||||||
|
~TEquivChars();
|
||||||
|
|
||||||
|
/**
|
||||||
|
@return copy of @param inputString modified such that each alphanumeric
|
||||||
|
character is replaced with it's collating character equivalent. If the
|
||||||
|
value @param isRegex is true, the input string is treated as a regular
|
||||||
|
expression and the alphabetical characters inside Posix bracket [::]
|
||||||
|
expressions are left as-is
|
||||||
|
*/
|
||||||
|
TQString replaceChars( const TQString &inputString, bool isRegex = false );
|
||||||
|
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
class TEquivChars_Private *p;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // TEQUIVCHARS_H
|
Loading…
Reference in new issue