You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tdelibs/tdecore/tdestringmatcher.cpp

654 lines
21 KiB

#include "tdestringmatcher.h"
#include "tequivchars.h"
#include <tdeglobal.h>
#include <tqregexp.h>
#include <kdebug.h>
#if __has_include( <features.h> ) // C++17
#pragma message "Using features.h to check for __GLIBC__"
#include <features.h>
#endif
#ifdef __GLIBC__
#include <fnmatch.h>
#pragma message "TSM using GLIBC fnmatch() for wildcard matching"
#endif
//================================================================================================
class AuxData
{
public:
AuxData();
TQString patternConverted; // Pattern converted from original (e.g ANCHandling::EQUIVALENCE)
TQRegExp* matchEngine; // Used when PatternType::REGEX
#ifdef __GLIBC__
int fnmatchFlags; // Used by fnmatch() when PatternType::WILDCARD
#endif
bool isCaseSensitive; // PatternType::SUBSTRING
};
AuxData::AuxData()
{
isCaseSensitive = true;
#ifdef __GLIBC__
fnmatchFlags = FNM_EXTMATCH; // Bash shell option 'extglob'
#endif
matchEngine = nullptr;
patternConverted = TQString::null;
}
//================================================================================================
typedef TQValueVector<AuxData> AuxDataList;
class TDEStringMatcher::TDEStringMatcherPrivate {
public:
// Properties that may be set / accessed through the TSM interface
TQString m_matchSpecString;
MatchSpecList m_matchSpecList;
// Properties that are internal implementation only
AuxDataList m_auxData;
void clearAll();
};
void TDEStringMatcher::TDEStringMatcherPrivate::clearAll()
{
m_matchSpecString = TQString::null;
m_matchSpecList.clear();
for ( size_t index = 0 ; index < m_auxData.count() ; index++ ) {
if ( m_auxData[index].matchEngine != nullptr ) {
TSMTRACE << "Freeing regex match engine " << m_auxData[index].matchEngine << endl;
delete m_auxData[index].matchEngine;
}
}
m_auxData.clear();
}
//================================================================================================
TDEStringMatcher::TDEStringMatcher()
{
TSMTRACE << "TSM::TDEStringMatcher(): New instance created: " << this << endl;
d = new TDEStringMatcherPrivate;
}
TDEStringMatcher::~TDEStringMatcher()
{
d->clearAll();
delete d;
TSMTRACE << "TSM::~TDEStringMatcher(): Instance destroyed: " << this << endl;
}
//================================================================================================
// Match specification output functions
//================================================================================================
const TQString TDEStringMatcher::getMatchSpecString() const
{
return d->m_matchSpecString;
}
const TDEStringMatcher::MatchSpecList TDEStringMatcher::getMatchSpecs() const
{
return d->m_matchSpecList;
}
//================================================================================================
// Match specification input functions
//================================================================================================
bool TDEStringMatcher::setMatchSpecs( MatchSpecList newMatchSpecList )
{
TDEStringMatcherPrivate workArea;
TQStringList newMatchSpecs;
TSMTRACE << "TSM::setPatterns(): validating match specification list" << endl;
for ( MatchSpec matchSpec : newMatchSpecList ) {
if ( matchSpec.pattern.isEmpty() ) {
TSMTRACE << " Error: empty pattern!" << endl;
workArea.clearAll();
return false;
}
if ( matchSpec.pattern.find( TQChar(PatternStringDivider) ) >= 0 ) {
TSMTRACE << " Error: pattern contains reserved separator character" << endl;
workArea.clearAll();
return false;
}
AuxData auxWork;
TQString inferredOptionString;
// Validate / process PatternType
auxWork.patternConverted = matchSpec.pattern;
switch ( matchSpec.patternType ) {
case PatternType::WILDCARD :
inferredOptionString += TQChar('w');
#ifndef __GLIBC__
auxWork.patternConverted = wildcardToRegex( auxWork.patternConverted );
TSMTRACE << " Converted wildcard expression '" << matchSpec.pattern << "' to regex '" << auxWork.patternConverted << "'" << endl;
#endif
break;
case PatternType::REGEX :
inferredOptionString += TQChar('r');
break;
case PatternType::SUBSTRING :
inferredOptionString += TQChar('s');
break;
default:
TSMTRACE << " Error: pattern type out of range" << endl;
workArea.clearAll();
return false;
}
// Validate / process ANCHandling
TQString before = auxWork.patternConverted;
switch ( matchSpec.ancHandling ) {
case ANCHandling::CASE_SENSITIVE :
inferredOptionString += TQChar('c');
auxWork.isCaseSensitive = true;
break;
case ANCHandling::CASE_INSENSITIVE :
inferredOptionString += TQChar('i');
auxWork.isCaseSensitive = false;
#ifdef __GLIBC__
auxWork.fnmatchFlags |= FNM_CASEFOLD;
#endif
break;
case ANCHandling::EQUIVALENCE :
inferredOptionString += TQChar('e');
auxWork.isCaseSensitive = true;
auxWork.patternConverted = TEquivChars::replaceChars( auxWork.patternConverted, true );
TSMTRACE << " Converted match pattern '" << before << "' to equivalent '" << auxWork.patternConverted << "'" << endl;
break;
default:
TSMTRACE << " Error: alphabetic character handling specification out of range" << endl;
workArea.clearAll();
return false;
}
if ( matchSpec.expectMatch )
inferredOptionString += TQChar('=');
else
inferredOptionString += TQChar('!');
// Test validity of pattern
TQRegExp rxWork;
int result;
switch ( matchSpec.patternType ) {
case PatternType::WILDCARD :
#ifdef __GLIBC__
// Test wildcard expression using a subject matter expert
result = fnmatch(
auxWork.patternConverted.local8Bit().data(),
auxWork.patternConverted.local8Bit().data(),
auxWork.fnmatchFlags
); // Comparison should fail
switch ( result ) {
case 0: // matched
case FNM_NOMATCH: // not matched
break;
default:
TSMTRACE << " Error: invalid wildcard syntax" << endl;
workArea.clearAll();
return false;
}
break;
#else
// Wildcard expression was converted to regex during earlier PatternType
// processing and will be subsequently validated as such.
#endif
case PatternType::REGEX :
// Prepare regex
rxWork.setPattern( auxWork.patternConverted );
rxWork.setCaseSensitive( auxWork.isCaseSensitive );
// Test regex
if ( rxWork.isValid() ) {
auxWork.matchEngine = new TQRegExp;
*auxWork.matchEngine = rxWork;
TSMTRACE << "AuxData: Allocated regex engine for matching '" << auxWork.matchEngine->pattern() << "'" << endl;
}
else {
TSMTRACE << " Error: invalid regex syntax'" << endl;
workArea.clearAll();
return false;
}
break;
// if (! rxWork.isReallyWhatUserIntended() ) { HA HA
}
// This particular match specification is good
newMatchSpecs.append( inferredOptionString );
newMatchSpecs.append( matchSpec.pattern );
workArea.m_auxData.append( auxWork );
}
// All proposed match specifications are good, update everything accordingly
workArea.m_matchSpecList = newMatchSpecList;
workArea.m_matchSpecString = newMatchSpecs.join( TQChar(PatternStringDivider) );
d->clearAll();
*d = workArea;
//-Debug: TSMTRACE << " Notifying slots of pattern change" << endl;
emit patternsChanged();
//-Debug: TSMTRACE << " All slots have been notified" << endl;
TSMTRACE << "TSM::setPatterns(): Patterns were successfully regenerated from list" << endl << endl;
return true;
}
//=================================================================================================
bool TDEStringMatcher::setMatchSpecs( TQString newMatchSpecString )
{
if ( newMatchSpecString == d->m_matchSpecString )
return true;
TDEStringMatcherPrivate workArea;
MatchSpec matchSpec = {
PatternType::DEFAULT,
ANCHandling::DEFAULT,
true, // seeking matches, not non-matches
""
};
TSMTRACE << "TSM::setPatterns: Proposed match specification string: <" << newMatchSpecString << ">" << endl;
if ( newMatchSpecString.isEmpty() ) {
TSMTRACE << " Empty pattern string => match specifications will be cleared" << endl;
d->m_matchSpecList.clear();
d->m_matchSpecString = "";
emit patternsChanged();
return true;
}
TQStringList newMatchSpecs = TQStringList::split( PatternStringDivider, newMatchSpecString, true );
if ( newMatchSpecs.count() % 2 != 0 ) {
TSMTRACE << " Error: match specification string must contain an even number of components" << endl;
return false;
}
bool processingOptionString = true; // expected format: option string , pattern string, ...
for ( TQString &specification : newMatchSpecs ) {
if ( processingOptionString ) {
specification = specification.lower();
TSMTRACE << " Processing match option string: '" << specification << "'" << endl;
for ( int i = 0 ; i < specification.length() ; i++ ) {
TQChar optionChar = specification[i];
//Debug: TSMTRACE << " Option character: '" << optionChar << "'" << endl;
switch ( optionChar ) {
case 'r' : matchSpec.patternType = PatternType::REGEX ; break;
case 'w' : matchSpec.patternType = PatternType::WILDCARD ; break;
case 's' : matchSpec.patternType = PatternType::SUBSTRING ; break;
case 'c' : matchSpec.ancHandling = ANCHandling::CASE_SENSITIVE ; break;
case 'i' : matchSpec.ancHandling = ANCHandling::CASE_INSENSITIVE; break;
case 'e' : matchSpec.ancHandling = ANCHandling::EQUIVALENCE ; break;
case '=' : matchSpec.expectMatch = true ; break;
case '!' : matchSpec.expectMatch = false ; break;
default:
// We reserve ALL other possible option characters for future use!
TSMTRACE << " Error: invalid option character" << endl;
workArea.clearAll();
return false;
}
}
processingOptionString = false; // next spec should be a pattern string
} // processingOptionString
else { // ! processingOptionString
TSMTRACE << " Processing match pattern string: '" << specification << "'" << endl;
if ( specification.isEmpty() ) {
TSMTRACE << " Error: empty pattern!" << endl;
workArea.clearAll();
return false;
}
AuxData auxWork;
// Validate / process PatternType
auxWork.patternConverted = specification;
switch ( matchSpec.patternType ) {
case PatternType::WILDCARD :
#ifndef __GLIBC__
auxWork.patternConverted = wildcardToRegex( specification );
TSMTRACE << " Converted wildcard expression '" << specification << "' to regex '" << auxWork.patternConverted << "'" << endl;
break;
#endif
case PatternType::REGEX :
case PatternType::SUBSTRING :
break;
default :
// This should never arise since the content of this field was set within this function
kdWarning() << "Error while processing '" << specification
<< "' pattern type out of range: " << (uchar) matchSpec.patternType
<< endl;
workArea.clearAll();
return false;
}
// Validate / process ANCHandling
TQString before = auxWork.patternConverted;
switch ( matchSpec.ancHandling ) {
case ANCHandling::CASE_SENSITIVE :
auxWork.isCaseSensitive = true;
break;
case ANCHandling::CASE_INSENSITIVE :
auxWork.isCaseSensitive = false;
#ifdef __GLIBC__
auxWork.fnmatchFlags |= FNM_CASEFOLD;
#endif
break;
case ANCHandling::EQUIVALENCE :
auxWork.isCaseSensitive = true;
auxWork.patternConverted = TEquivChars::replaceChars( auxWork.patternConverted, true );
TSMTRACE << " Converted match pattern '" << before << "' to equivalent '" << auxWork.patternConverted << "'" << endl;
break;
default: break;
kdWarning() << "Error while processing '" << specification
<< "' alphabetic character handling specification out of range: " << (uchar) matchSpec.ancHandling
<< endl;
workArea.clearAll();
return false;
}
// Test validity of pattern
TQRegExp rxWork; // single working copy == each pattern inherits previous options
int result;
switch ( matchSpec.patternType ) {
case PatternType::WILDCARD :
#ifdef __GLIBC__ // Test wildcard expression using a subject matter expert
result = fnmatch(
auxWork.patternConverted.local8Bit().data(),
auxWork.patternConverted.local8Bit().data(),
auxWork.fnmatchFlags
); // Comparison should fail
switch ( result ) {
case 0: // matched
case FNM_NOMATCH: // not matched
break;
default:
TSMTRACE << " Error: invalid wildcard syntax" << endl;
workArea.clearAll();
return false;
}
break;
#endif // Otherwise we will test wildcard expression as one converted to x regex
case PatternType::REGEX :
// Prepare regex
rxWork.setPattern( auxWork.patternConverted );
rxWork.setCaseSensitive( auxWork.isCaseSensitive );
// Test regex
if ( rxWork.isValid() ) {
auxWork.matchEngine = new TQRegExp;
*auxWork.matchEngine = rxWork;
TSMTRACE << " AuxData: Allocated regex engine " << auxWork.matchEngine << " for pattern: " << auxWork.matchEngine->pattern() << endl;
}
else {
TSMTRACE << " Error: invalid regex syntax" << endl;
workArea.clearAll();
return false;
}
break;
// if (! rxWork.isReallyWhatUserIntended() ) { HA HA
}
matchSpec.pattern = specification;
workArea.m_matchSpecList.push_back( matchSpec );
workArea.m_auxData.append( auxWork );
processingOptionString = true; // next spec should be an option string
} // ! processingOptionString completed
}
workArea.m_matchSpecString = newMatchSpecString;
d->clearAll();
*d = workArea;
TSMTRACE << " Final patternString: '" << d->m_matchSpecString << "'" << endl;
TSMTRACE << " Number of match patterns in list: '" << d->m_matchSpecList.count() << "'" << endl;
//-Debug: TSMTRACE << " Notifying slots of pattern change" << endl;
emit patternsChanged();
//-Debug: TSMTRACE << " All slots have been notified" << endl;
TSMTRACE << "TSM::setPatterns(): Patterns were successfully regenerated from string" << endl << endl;
return true;
}
//================================================================================================
// Match functions
//================================================================================================
bool TDEStringMatcher::matchAny( const TQString& stringToMatch ) const
{
/* DEBUG
TSMTRACE << "TSM:matchAny(): Attempting to match string '" << stringToMatch << "' against stored patterns" << endl;
if ( d->m_matchSpecList.isEmpty() ) {
//-Debug: TSMTRACE << "Match failed on empty pattern list!" << endl;
return false;
}
*/
TQString equivalentString;
for ( size_t index = 0 ; index < d->m_matchSpecList.count() ; index++ )
{
TQString matchWhat = stringToMatch;
TQString matchThis = d->m_auxData[index].patternConverted;
if ( d->m_matchSpecList[index].ancHandling == ANCHandling::EQUIVALENCE ) {
if ( equivalentString.isEmpty() ) {
//TBR equivalentString = TDEGlobal::equivChars()->replaceChars( stringToMatch, false ) ;
equivalentString = TEquivChars::replaceChars( stringToMatch, false ) ;
}
matchWhat = equivalentString;
}
bool matchFound = false;
switch ( d->m_matchSpecList[index].patternType ) {
case PatternType::WILDCARD :
#ifdef __GLIBC__
matchFound = ( fnmatch(
matchThis.local8Bit().data(),
matchWhat.local8Bit().data(),
d->m_auxData[index].fnmatchFlags
) == 0 );
break;
#endif
case PatternType::REGEX :
matchFound = ( d->m_auxData[index].matchEngine->search( matchWhat ) >= 0 );
break;
case PatternType::SUBSTRING :
matchFound = ( matchWhat.find( matchThis, 0, d->m_auxData[index].isCaseSensitive ) >= 0 );
break;
}
if ( matchFound == d->m_matchSpecList[index].expectMatch ) {
TSMTRACE << " Success! match of pattern '" << matchThis << "' against '" << matchWhat << "' turned out as expected" << endl;
return true;
}
}
TSMTRACE << " Match failed, there were no pattern matches against '" << stringToMatch << "' that turned out as expected" << endl;
return false ;
}
bool TDEStringMatcher::matchAll( const TQString& stringToMatch ) const
{
TSMTRACE << "TSM:matchAll(): Attempting to match string '" << stringToMatch << "' against stored patterns" << endl;
if ( d->m_matchSpecList.isEmpty() ) {
//-Debug: TSMTRACE << "Match failed on empty pattern list!" << endl;
return false;
}
TQString equivalentString;
for ( size_t index = 0 ; index < d->m_matchSpecList.count() ; index++ )
{
TQString matchWhat = stringToMatch;
TQString matchThis = d->m_auxData[index].patternConverted;
if ( d->m_matchSpecList[index].ancHandling == ANCHandling::EQUIVALENCE ) {
if ( equivalentString.isEmpty() ) {
//TBR equivalentString = TDEGlobal::equivChars()->replaceChars( stringToMatch, false ) ;
equivalentString = TEquivChars::replaceChars( stringToMatch, false ) ;
}
matchWhat = equivalentString;
}
bool matchFound = false;
switch ( d->m_matchSpecList[index].patternType ) {
case PatternType::WILDCARD :
#ifdef __GLIBC__
matchFound = ( fnmatch(
matchThis.local8Bit().data(),
matchWhat.local8Bit().data(),
d->m_auxData[index].fnmatchFlags
) == 0 );
break;
#endif
case PatternType::REGEX :
matchFound = ( d->m_auxData[index].matchEngine->search( matchWhat ) >= 0 );
break;
case PatternType::SUBSTRING :
matchFound = ( matchWhat.find( matchThis, 0, d->m_auxData[index].isCaseSensitive ) >= 0 );
break;
}
if ( matchFound != d->m_matchSpecList[index].expectMatch ) {
TSMTRACE << " Match of pattern '" << matchThis << "' against '" << matchWhat << "' did not turn out as expected" << endl;
return false;
}
}
TSMTRACE << " Expected pattern matching succeeded" << endl;
return true;
}
//================================================================================================
// Utility functions
//================================================================================================
/*
The following code is a modified copy of that found in tqt3/src/tools/qregexp.cpp.
We export this as utility function for applications that wish to convert a basic
wildcard expression into a basic regular expression. TSM will not use this unless
GLIBC fnmatch() is not available.
*/
TQString TDEStringMatcher::wildcardToRegex( const TQString& wildcardPattern )
{
int wclen = wildcardPattern.length();
TQString rx = TQString::fromLatin1( "" );
int i = 0;
const TQChar *wc = wildcardPattern.unicode();
while ( i < wclen ) {
TQChar c = wc[i++];
switch ( c.unicode() ) {
case '*':
rx += TQString::fromLatin1( ".*" );
break;
case '?':
rx += TQChar( '.' );
break;
case '$':
case '(':
case ')':
case '+':
case '.':
case '\\':
case '^':
case '{':
case '|':
case '}':
rx += TQChar( '\\' );
rx += c;
break;
case '[':
rx += c;
/* This is not correct, POSIX states that negation character is '!'
if ( wc[i] == TQChar('^') )
rx += wc[i++];
*/
if ( wc[i] == TQChar('!') ) {
rx += TQChar('^');
i++;
} else if ( wc[i] == TQChar('^') ) {
rx += TQChar( '\\' );
rx += wc[i++];
}
if ( i < wclen ) {
if ( rx[i] == ']' )
rx += wc[i++];
while ( i < wclen && wc[i] != TQChar(']') ) {
if ( wc[i] == '\\' )
rx += TQChar( '\\' );
rx += wc[i++];
}
}
break;
default:
rx += c;
}
}
/* Wildcard patterns must match entire string */
return TQChar('^') + rx + TQChar('$');
/* TBD: Add support for extglob */
}
static TQString escapeRegexChars( const TQString& basicString )
{
int wclen = basicString.length();
TQString outputString = TQString::fromLatin1( "" );
int i = 0;
const TQChar *wc = basicString.unicode();
while ( i < wclen ) {
TQChar c = wc[i++];
switch ( c.unicode() ) {
case '+':
case '.':
case '^':
case '(':
case ')':
case '[':
case ']':
case '{':
case '}':
case '|':
case '$':
case '?':
case '*':
case '\\':
outputString += TQChar( '\\' );
outputString += c;
break;
default:
outputString += c;
}
}
return outputString;
}
//================================================================================================
#include "tdestringmatcher.moc"