You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
515 lines
16 KiB
C++
515 lines
16 KiB
C++
#include "tdestringmatcher.h"
|
|
|
|
#include <tqregexp.h>
|
|
#include <kdebug.h>
|
|
|
|
typedef TQValueVector<TQRegExp> RegexList;
|
|
|
|
class TDEStringMatcher::TDEStringMatcherPrivate {
|
|
public:
|
|
|
|
// Properties that may be set / accessed through the TSM interface
|
|
TQString m_matchSpecString;
|
|
MatchSpecList m_matchSpecList;
|
|
|
|
// Properties that implementation only
|
|
RegexList m_regexList;
|
|
/* Individual TQRegExp objects would not be used to process
|
|
a PatternType doesn't require a regex engine for matching
|
|
but we may "borrow" the TQRegExp.pattern() field to store
|
|
a "converted" version of the pattern.
|
|
*/
|
|
};
|
|
|
|
TDEStringMatcher::TDEStringMatcher()
|
|
{
|
|
TSMTRACE << "TDEStringMatcher::TDEStringMatcher: New instance created: " << this << endl;
|
|
p = new TDEStringMatcherPrivate;
|
|
}
|
|
|
|
TDEStringMatcher::~TDEStringMatcher()
|
|
{
|
|
p->m_matchSpecList.clear();
|
|
p->m_regexList.clear();
|
|
delete p;
|
|
TSMTRACE << "TDEStringMatcher::TDEStringMatcher: Instance destroyed: " << this << endl;
|
|
}
|
|
|
|
//================================================================================================
|
|
// Match specification output functions
|
|
//================================================================================================
|
|
|
|
TQString TDEStringMatcher::getMatchSpecString()
|
|
{
|
|
return p->m_matchSpecString;
|
|
}
|
|
|
|
MatchSpecList TDEStringMatcher::getMatchSpecs()
|
|
{
|
|
return p->m_matchSpecList;
|
|
}
|
|
|
|
|
|
//================================================================================================
|
|
// Match specification input functions
|
|
//================================================================================================
|
|
|
|
bool TDEStringMatcher::setMatchSpecs( MatchSpecList newMatchSpecList )
|
|
{
|
|
|
|
RegexList newRegexList;
|
|
|
|
TQString optionString = "rc" ; // start with defaults
|
|
TQStringList newMatchSpecs;
|
|
|
|
TQRegExp rxWork;
|
|
|
|
TSMTRACE << "TDEStringMatcher::setPatterns: validating match specification list" << endl;
|
|
|
|
for ( MatchSpec matchSpec : newMatchSpecList ) {
|
|
|
|
if ( matchSpec.pattern.isEmpty() ) {
|
|
TSMTRACE << " Error: empty pattern!" << endl;
|
|
newRegexList.clear();
|
|
return false;
|
|
}
|
|
if ( matchSpec.pattern.find( TQChar(SEP) ) >= 0 ) {
|
|
TSMTRACE << " Error: pattern contains reserved separator character" << endl;
|
|
newRegexList.clear();
|
|
return false;
|
|
}
|
|
|
|
switch ( matchSpec.patternType ) {
|
|
|
|
// The following pattern types will be using TQRegExp functions for matching
|
|
case PatternType::REGEX :
|
|
optionString += TQChar('r');
|
|
rxWork.setPattern( matchSpec.pattern );
|
|
break;
|
|
case PatternType::WILDCARD :
|
|
optionString += TQChar('w');
|
|
rxWork.setPattern( wildcardToRegex( matchSpec.pattern ) );
|
|
break;
|
|
|
|
// The following pattern types will be using TQString functions for matching
|
|
case PatternType::SUBSTRING :
|
|
optionString += TQChar('s');
|
|
rxWork.setPattern( matchSpec.pattern ); // we will "borrow" this field
|
|
break;
|
|
|
|
default:
|
|
newRegexList.clear();
|
|
TSMTRACE << " Error: pattern type out of range" << endl;
|
|
return false;
|
|
}
|
|
|
|
switch ( matchSpec.ancHandling ) {
|
|
|
|
case ANCHandling::CASE_SENSITIVE :
|
|
optionString += TQChar('c');
|
|
rxWork.setCaseSensitive( true );
|
|
break;
|
|
case ANCHandling::CASE_INSENSITIVE :
|
|
optionString += TQChar('i');
|
|
rxWork.setCaseSensitive( false );
|
|
break;
|
|
case ANCHandling::EQUIVALENCE :
|
|
optionString += TQChar('e');
|
|
rxWork.setCaseSensitive( true );
|
|
// FIXME TBD: This is where we will be converting each (unescaped)
|
|
// alphanumeric character in rxWork.pattern to its "least" equivalent.
|
|
break;
|
|
default:
|
|
newRegexList.clear();
|
|
TSMTRACE << " Error: alphabetic character handling specification out of range" << endl;
|
|
return false;
|
|
}
|
|
|
|
if ( matchSpec.wantMatch )
|
|
optionString += TQChar('=');
|
|
else
|
|
optionString += TQChar('!');
|
|
|
|
if (! rxWork.isValid() ) {
|
|
TSMTRACE << " Error: invalid pattern syntax'" << endl;
|
|
newRegexList.clear();
|
|
return false;
|
|
}
|
|
|
|
// This particular match specification is good
|
|
|
|
newMatchSpecs.append( optionString );
|
|
newMatchSpecs.append( matchSpec.pattern );
|
|
newRegexList.append( rxWork );
|
|
optionString = "";
|
|
}
|
|
|
|
// All proposed match specifications are good, update everything accordingly
|
|
|
|
p->m_matchSpecList.clear(); p->m_matchSpecList = newMatchSpecList;
|
|
p->m_regexList.clear(); p->m_regexList = newRegexList;
|
|
p->m_matchSpecString = newMatchSpecs.join( TQChar(SEP) );
|
|
emit patternsChanged();
|
|
|
|
return true;
|
|
}
|
|
|
|
//=================================================================================================
|
|
|
|
bool TDEStringMatcher::setMatchSpecs( TQString newMatchSpecString )
|
|
{
|
|
MatchSpecList newMatchSpecList;
|
|
RegexList newRegexList;
|
|
|
|
TQRegExp rxWork; // single working copy == each pattern inherits previous options
|
|
|
|
MatchSpec matchSpec = {
|
|
PatternType::DEFAULT,
|
|
ANCHandling::DEFAULT,
|
|
true, // seeking matches, not non-matches
|
|
""
|
|
};
|
|
|
|
if ( newMatchSpecString == p->m_matchSpecString )
|
|
return true;
|
|
TSMTRACE << "TDEStringMatcher::setPatterns: Proposed match specification string: <" << newMatchSpecString << ">" << endl;
|
|
|
|
if ( newMatchSpecString.isEmpty() ) {
|
|
TSMTRACE << " Empty pattern string => match specifications will be cleared" << endl;
|
|
p->m_matchSpecList.clear();
|
|
p->m_regexList.clear();
|
|
p->m_matchSpecString = "";
|
|
emit patternsChanged();
|
|
return true;
|
|
}
|
|
|
|
TQStringList newMatchSpecs = TQStringList::split( SEP, newMatchSpecString, true );
|
|
|
|
if ( newMatchSpecs.count() % 2 != 0 ) {
|
|
TSMTRACE << " Error: match specification string must contain an even number of components" << endl;
|
|
return false;
|
|
}
|
|
TSMTRACE << newMatchSpecs.count() << endl;
|
|
|
|
bool processingPattern = false; // expected format: option string , pattern string, ...
|
|
|
|
for ( TQString &specification : newMatchSpecs ) {
|
|
|
|
if ( specification.find( TQChar(SEP) ) >= 0 ) {
|
|
TSMTRACE << " Error: match specification string contains reserved separator character" << endl;
|
|
newMatchSpecList.clear();
|
|
newRegexList.clear();
|
|
return false;
|
|
}
|
|
|
|
if ( processingPattern ) {
|
|
TSMTRACE << " Processing match pattern string: '" << specification << "'" << endl;
|
|
|
|
if ( specification.isEmpty() ) {
|
|
TSMTRACE << " Error: empty patterns are not allowed" << endl;
|
|
newMatchSpecList.clear();
|
|
newRegexList.clear();
|
|
return false;
|
|
}
|
|
|
|
// Prepare regex
|
|
|
|
switch ( matchSpec.patternType ) {
|
|
|
|
// The following pattern types will be using TQRegExp functions for matching
|
|
case PatternType::REGEX :
|
|
rxWork.setPattern( specification );
|
|
break;
|
|
case PatternType::WILDCARD :
|
|
rxWork.setPattern( wildcardToRegex( specification ) );
|
|
break;
|
|
|
|
// The following pattern types will be using TQString functions for matching
|
|
case PatternType::SUBSTRING :
|
|
rxWork.setPattern( specification ); // used for storage only
|
|
break;
|
|
|
|
default:
|
|
continue; // should not arise
|
|
}
|
|
|
|
switch ( matchSpec.ancHandling ) {
|
|
case ANCHandling::CASE_SENSITIVE :
|
|
rxWork.setCaseSensitive( true );
|
|
break;
|
|
case ANCHandling::CASE_INSENSITIVE :
|
|
rxWork.setCaseSensitive( false );
|
|
break;
|
|
case ANCHandling::EQUIVALENCE :
|
|
rxWork.setCaseSensitive( false );
|
|
// FIXME TBD: This is where we will be converting each (unescaped)
|
|
// alphanumeric character in rxWork.pattern to its "least" equivalent.
|
|
break;
|
|
default:
|
|
continue; // should not arise
|
|
}
|
|
|
|
// Test regex
|
|
|
|
if (! rxWork.isValid() ) {
|
|
TSMTRACE << " Error: invalid pattern syntax'" << endl;
|
|
newMatchSpecList.clear();
|
|
newRegexList.clear();
|
|
return false;
|
|
continue;
|
|
}
|
|
|
|
// if (! rxWork.isReallyWhatUserIntended() ) { HA HA
|
|
|
|
TSMTRACE << " Final Wildcard/CaseSensitive settings: " << rxWork.wildcard() << "/" << rxWork.caseSensitive() << endl;
|
|
|
|
matchSpec.pattern = specification;
|
|
newMatchSpecList.push_back( matchSpec );
|
|
newRegexList.append( rxWork );
|
|
processingPattern = false; // next spec should be an option string
|
|
continue;
|
|
}
|
|
|
|
specification = specification.lower();
|
|
TSMTRACE << " Processing match option string: '" << specification << "'" << endl;
|
|
for ( int i = 0 ; i < specification.length() ; i++ ) {
|
|
TQChar optionChar = specification[i];
|
|
TSMTRACE << " Option character: '" << optionChar << "'" << endl;
|
|
|
|
switch ( optionChar ) {
|
|
case 'r' : matchSpec.patternType = PatternType::REGEX ; break;
|
|
case 'w' : matchSpec.patternType = PatternType::WILDCARD ; break;
|
|
case 's' : matchSpec.patternType = PatternType::SUBSTRING ; break;
|
|
case 'c' : matchSpec.ancHandling = ANCHandling::CASE_SENSITIVE ; break;
|
|
case 'i' : matchSpec.ancHandling = ANCHandling::CASE_INSENSITIVE; break;
|
|
case 'e' : matchSpec.ancHandling = ANCHandling::EQUIVALENCE ; break;
|
|
case '=' : matchSpec.wantMatch = true ; break;
|
|
case '!' : matchSpec.wantMatch = false ; break;
|
|
default:
|
|
// We reserve ALL other possible option characters for future use!
|
|
TSMTRACE << " Error: invalid option character" << endl;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
processingPattern = true; // next spec should be a pattern string
|
|
}
|
|
|
|
p->m_matchSpecList.clear(); p->m_matchSpecList = newMatchSpecList;
|
|
p->m_regexList.clear(); p->m_regexList = newRegexList;
|
|
p->m_matchSpecString = newMatchSpecString;
|
|
|
|
//newRegexList.clear(); // no need to do this?
|
|
|
|
TSMTRACE << " Final patternString: '" << p->m_matchSpecString << "'" << endl;
|
|
TSMTRACE << " Number of regex match patterns in list: '" << p->m_regexList.count() << "'" << endl;
|
|
TSMTRACE << " Notifying slots of pattern change" << endl;
|
|
emit patternsChanged();
|
|
TSMTRACE << " All slots have been notified" << endl;
|
|
TSMTRACE << "TDEStringMatcher::setPatterns: Patterns were successfully regenerated" << endl << endl;
|
|
return true;
|
|
}
|
|
|
|
//================================================================================================
|
|
// Match functions
|
|
//================================================================================================
|
|
|
|
bool TDEStringMatcher::matchAny( const TQString& stringToMatch )
|
|
{
|
|
TSMTRACE << "Attempting to match string '" << stringToMatch << "' against stored patterns" << endl;
|
|
if ( p->m_matchSpecList.isEmpty() ) {
|
|
//-Debug: TSMTRACE << "Match failed on empty pattern list!" << endl;
|
|
return false; //FIXME: or should that be true per MicheleC's comment?
|
|
}
|
|
|
|
TQString equivalentString;
|
|
|
|
for ( size_t index = 0 ; index < p->m_matchSpecList.count() ; index++ )
|
|
{
|
|
TQString matchThis = stringToMatch;
|
|
if ( p->m_matchSpecList[index].ancHandling == ANCHandling::EQUIVALENCE )
|
|
{
|
|
if ( equivalentString.isNull() ) {
|
|
// FIXME TBD: This is where we will be converting each alphanumeric
|
|
// character in stringToMatch to its "least" equivalent and storing
|
|
// the result in equivalentString. Until then, we'll just do:
|
|
equivalentString = stringToMatch;
|
|
}
|
|
matchThis = equivalentString;
|
|
}
|
|
|
|
switch ( p->m_matchSpecList[index].patternType ) {
|
|
|
|
case PatternType::REGEX :
|
|
case PatternType::WILDCARD :
|
|
if (
|
|
( p->m_regexList[index].search( matchThis ) >= 0 ) // was there a match?
|
|
== p->m_matchSpecList[index].wantMatch // is that what we were looking for?
|
|
) {
|
|
TSMTRACE << "Match succeeded with regex pattern: '" << p->m_regexList[index].pattern() << "'" << endl;
|
|
return true;
|
|
}
|
|
break;
|
|
|
|
case PatternType::SUBSTRING :
|
|
bool cs = ! (bool) p->m_matchSpecList[index].ancHandling;
|
|
if (
|
|
( matchThis.find( p->m_matchSpecList[index].pattern, 0, cs ) >= 0 ) // was there a match?
|
|
== p->m_matchSpecList[index].wantMatch // is that what we were looking for?
|
|
) {
|
|
TSMTRACE << "Match succeeded with substring: '" << p->m_matchSpecList[index].pattern << "'" << endl;
|
|
return true;
|
|
}
|
|
break;
|
|
}
|
|
|
|
}
|
|
//-Debug: TSMTRACE << "Match failed, no pattern matched!" << endl;
|
|
return false ;
|
|
}
|
|
|
|
bool TDEStringMatcher::matchAll( const TQString& stringToMatch )
|
|
{
|
|
//-Debug: TSMTRACE << "Attempting to match string '" << stringToMatch << "' against stored patterns" << endl;
|
|
if ( p->m_matchSpecList.isEmpty() ) {
|
|
//-Debug: TSMTRACE << "Match failed on empty pattern list!" << endl;
|
|
return false; //FIXME: or should that be true per MicheleC's comment?
|
|
}
|
|
|
|
TQString equivalentString;
|
|
|
|
for ( size_t index = 0 ; index < p->m_matchSpecList.count() ; index++ )
|
|
{
|
|
TQString matchThis = stringToMatch;
|
|
if ( p->m_matchSpecList[index].ancHandling == ANCHandling::EQUIVALENCE )
|
|
{
|
|
if ( equivalentString.isNull() ) {
|
|
// FIXME TBD: This is where we will be converting each alphanumeric
|
|
// character in stringToMatch to its "least" equivalent and storing
|
|
// the result in equivalentString. Until then, we'll just do:
|
|
equivalentString = stringToMatch;
|
|
}
|
|
matchThis = equivalentString;
|
|
}
|
|
|
|
if (
|
|
( p->m_regexList[index].search( matchThis ) < 0 ) // was there no match?
|
|
!= p->m_matchSpecList[index].wantMatch // is that what we were looking for?
|
|
) {
|
|
//-Debug: TSMTRACE << "String fail3ed to matching pattern: '" << rxPattern->pattern() << "'" << endl;
|
|
return false;
|
|
}
|
|
|
|
if ( p->m_regexList[index].search( matchThis ) < 0 ) {
|
|
//-Debug: TSMTRACE << "String failed to match pattern: '" << rxPattern->pattern() << "'" << endl;
|
|
return false;
|
|
}
|
|
}
|
|
//-Debug: TSMTRACE << "Match succeeded, all patterns matched!" << endl;
|
|
return true;
|
|
}
|
|
|
|
//================================================================================================
|
|
// Utility functions
|
|
//================================================================================================
|
|
|
|
/*
|
|
The following code is a modified copy of that found in tqt3/src/tools/qregexp.cpp.
|
|
*/
|
|
TQString TDEStringMatcher::wildcardToRegex( const TQString& wildcardPattern )
|
|
{
|
|
int wclen = wildcardPattern.length();
|
|
TQString rx = TQString::fromLatin1( "" );
|
|
int i = 0;
|
|
const TQChar *wc = wildcardPattern.unicode();
|
|
while ( i < wclen ) {
|
|
TQChar c = wc[i++];
|
|
switch ( c.unicode() ) {
|
|
case '*':
|
|
rx += TQString::fromLatin1( ".*" );
|
|
break;
|
|
case '?':
|
|
rx += TQChar( '.' );
|
|
break;
|
|
case '$':
|
|
case '(':
|
|
case ')':
|
|
case '+':
|
|
case '.':
|
|
case '\\':
|
|
case '^':
|
|
case '{':
|
|
case '|':
|
|
case '}':
|
|
rx += TQChar( '\\' );
|
|
rx += c;
|
|
break;
|
|
case '[':
|
|
rx += c;
|
|
/* This is not correct, POSIX states that negation character is '!'
|
|
if ( wc[i] == TQChar('^') )
|
|
rx += wc[i++];
|
|
*/
|
|
if ( wc[i] == TQChar('!') ) {
|
|
rx += TQChar('^');
|
|
i++;
|
|
} else if ( wc[i] == TQChar('^') ) {
|
|
rx += TQChar( '\\' );
|
|
rx += wc[i++];
|
|
}
|
|
if ( i < wclen ) {
|
|
if ( rx[i] == ']' )
|
|
rx += wc[i++];
|
|
while ( i < wclen && wc[i] != TQChar(']') ) {
|
|
if ( wc[i] == '\\' )
|
|
rx += TQChar( '\\' );
|
|
rx += wc[i++];
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
rx += c;
|
|
}
|
|
}
|
|
/* Wildcard patterns must match entire string */
|
|
return TQChar('^') + rx + TQChar('$');
|
|
/* TBD: Add support for extglob */
|
|
}
|
|
|
|
static TQString escapeRegexChars( const TQString& basicString )
|
|
{
|
|
int wclen = basicString.length();
|
|
TQString outputString = TQString::fromLatin1( "" );
|
|
int i = 0;
|
|
const TQChar *wc = basicString.unicode();
|
|
while ( i < wclen ) {
|
|
TQChar c = wc[i++];
|
|
switch ( c.unicode() ) {
|
|
case '+':
|
|
case '.':
|
|
case '^':
|
|
case '(':
|
|
case ')':
|
|
case '[':
|
|
case ']':
|
|
case '{':
|
|
case '}':
|
|
case '|':
|
|
case '$':
|
|
case '?':
|
|
case '*':
|
|
case '\\':
|
|
outputString += TQChar( '\\' );
|
|
outputString += c;
|
|
break;
|
|
default:
|
|
outputString += c;
|
|
}
|
|
}
|
|
return outputString;
|
|
}
|
|
|
|
//================================================================================================
|
|
|
|
#include "tdestringmatcher.moc"
|