#include "tdestringmatcher.h" #include #include typedef TQValueVector RegexList; class TDEStringMatcher::TDEStringMatcherPrivate { public: // Properties that may be set / accessed through the TSM interface TQString m_matchSpecString; MatchSpecList m_matchSpecList; // Properties that implementation only RegexList m_regexList; /* Individual TQRegExp objects would not be used to process a PatternType doesn't require a regex engine for matching but we may "borrow" the TQRegExp.pattern() field to store a "converted" version of the pattern. */ }; TDEStringMatcher::TDEStringMatcher() { TSMTRACE << "TDEStringMatcher::TDEStringMatcher: New instance created: " << this << endl; p = new TDEStringMatcherPrivate; } TDEStringMatcher::~TDEStringMatcher() { p->m_matchSpecList.clear(); p->m_regexList.clear(); delete p; TSMTRACE << "TDEStringMatcher::TDEStringMatcher: Instance destroyed: " << this << endl; } //================================================================================================ // Match specification output functions //================================================================================================ TQString TDEStringMatcher::getMatchSpecString() { return p->m_matchSpecString; } MatchSpecList TDEStringMatcher::getMatchSpecs() { return p->m_matchSpecList; } //================================================================================================ // Match specification input functions //================================================================================================ bool TDEStringMatcher::setMatchSpecs( MatchSpecList newMatchSpecList ) { RegexList newRegexList; TQString optionString = "rc" ; // start with defaults TQStringList newMatchSpecs; TQRegExp rxWork; TSMTRACE << "TDEStringMatcher::setPatterns: validating match specification list" << endl; for ( MatchSpec matchSpec : newMatchSpecList ) { if ( matchSpec.pattern.isEmpty() ) { TSMTRACE << " Error: empty pattern!" << endl; newRegexList.clear(); return false; } if ( matchSpec.pattern.find( TQChar(SEP) ) >= 0 ) { TSMTRACE << " Error: pattern contains reserved separator character" << endl; newRegexList.clear(); return false; } switch ( matchSpec.patternType ) { // The following pattern types will be using TQRegExp functions for matching case PatternType::REGEX : optionString += TQChar('r'); rxWork.setPattern( matchSpec.pattern ); break; case PatternType::WILDCARD : optionString += TQChar('w'); rxWork.setPattern( wildcardToRegex( matchSpec.pattern ) ); break; // The following pattern types will be using TQString functions for matching case PatternType::SUBSTRING : optionString += TQChar('s'); rxWork.setPattern( matchSpec.pattern ); // we will "borrow" this field break; default: newRegexList.clear(); TSMTRACE << " Error: pattern type out of range" << endl; return false; } switch ( matchSpec.ancHandling ) { case ANCHandling::CASE_SENSITIVE : optionString += TQChar('c'); rxWork.setCaseSensitive( true ); break; case ANCHandling::CASE_INSENSITIVE : optionString += TQChar('i'); rxWork.setCaseSensitive( false ); break; case ANCHandling::EQUIVALENCE : optionString += TQChar('e'); rxWork.setCaseSensitive( true ); // FIXME TBD: This is where we will be converting each (unescaped) // alphanumeric character in rxWork.pattern to its "least" equivalent. break; default: newRegexList.clear(); TSMTRACE << " Error: alphabetic character handling specification out of range" << endl; return false; } if ( matchSpec.wantMatch ) optionString += TQChar('='); else optionString += TQChar('!'); if (! rxWork.isValid() ) { TSMTRACE << " Error: invalid pattern syntax'" << endl; newRegexList.clear(); return false; } // This particular match specification is good newMatchSpecs.append( optionString ); newMatchSpecs.append( matchSpec.pattern ); newRegexList.append( rxWork ); optionString = ""; } // All proposed match specifications are good, update everything accordingly p->m_matchSpecList.clear(); p->m_matchSpecList = newMatchSpecList; p->m_regexList.clear(); p->m_regexList = newRegexList; p->m_matchSpecString = newMatchSpecs.join( TQChar(SEP) ); emit patternsChanged(); return true; } //================================================================================================= bool TDEStringMatcher::setMatchSpecs( TQString newMatchSpecString ) { MatchSpecList newMatchSpecList; RegexList newRegexList; TQRegExp rxWork; // single working copy == each pattern inherits previous options MatchSpec matchSpec = { PatternType::DEFAULT, ANCHandling::DEFAULT, true, // seeking matches, not non-matches "" }; if ( newMatchSpecString == p->m_matchSpecString ) return true; TSMTRACE << "TDEStringMatcher::setPatterns: Proposed match specification string: <" << newMatchSpecString << ">" << endl; if ( newMatchSpecString.isEmpty() ) { TSMTRACE << " Empty pattern string => match specifications will be cleared" << endl; p->m_matchSpecList.clear(); p->m_regexList.clear(); p->m_matchSpecString = ""; emit patternsChanged(); return true; } TQStringList newMatchSpecs = TQStringList::split( SEP, newMatchSpecString, true ); if ( newMatchSpecs.count() % 2 != 0 ) { TSMTRACE << " Error: match specification string must contain an even number of components" << endl; return false; } TSMTRACE << newMatchSpecs.count() << endl; bool processingPattern = false; // expected format: option string , pattern string, ... for ( TQString &specification : newMatchSpecs ) { if ( specification.find( TQChar(SEP) ) >= 0 ) { TSMTRACE << " Error: match specification string contains reserved separator character" << endl; newMatchSpecList.clear(); newRegexList.clear(); return false; } if ( processingPattern ) { TSMTRACE << " Processing match pattern string: '" << specification << "'" << endl; if ( specification.isEmpty() ) { TSMTRACE << " Error: empty patterns are not allowed" << endl; newMatchSpecList.clear(); newRegexList.clear(); return false; } // Prepare regex switch ( matchSpec.patternType ) { // The following pattern types will be using TQRegExp functions for matching case PatternType::REGEX : rxWork.setPattern( specification ); break; case PatternType::WILDCARD : rxWork.setPattern( wildcardToRegex( specification ) ); break; // The following pattern types will be using TQString functions for matching case PatternType::SUBSTRING : rxWork.setPattern( specification ); // used for storage only break; default: continue; // should not arise } switch ( matchSpec.ancHandling ) { case ANCHandling::CASE_SENSITIVE : rxWork.setCaseSensitive( true ); break; case ANCHandling::CASE_INSENSITIVE : rxWork.setCaseSensitive( false ); break; case ANCHandling::EQUIVALENCE : rxWork.setCaseSensitive( false ); // FIXME TBD: This is where we will be converting each (unescaped) // alphanumeric character in rxWork.pattern to its "least" equivalent. break; default: continue; // should not arise } // Test regex if (! rxWork.isValid() ) { TSMTRACE << " Error: invalid pattern syntax'" << endl; newMatchSpecList.clear(); newRegexList.clear(); return false; continue; } // if (! rxWork.isReallyWhatUserIntended() ) { HA HA TSMTRACE << " Final Wildcard/CaseSensitive settings: " << rxWork.wildcard() << "/" << rxWork.caseSensitive() << endl; matchSpec.pattern = specification; newMatchSpecList.push_back( matchSpec ); newRegexList.append( rxWork ); processingPattern = false; // next spec should be an option string continue; } specification = specification.lower(); TSMTRACE << " Processing match option string: '" << specification << "'" << endl; for ( int i = 0 ; i < specification.length() ; i++ ) { TQChar optionChar = specification[i]; TSMTRACE << " Option character: '" << optionChar << "'" << endl; switch ( optionChar ) { case 'r' : matchSpec.patternType = PatternType::REGEX ; break; case 'w' : matchSpec.patternType = PatternType::WILDCARD ; break; case 's' : matchSpec.patternType = PatternType::SUBSTRING ; break; case 'c' : matchSpec.ancHandling = ANCHandling::CASE_SENSITIVE ; break; case 'i' : matchSpec.ancHandling = ANCHandling::CASE_INSENSITIVE; break; case 'e' : matchSpec.ancHandling = ANCHandling::EQUIVALENCE ; break; case '=' : matchSpec.wantMatch = true ; break; case '!' : matchSpec.wantMatch = false ; break; default: // We reserve ALL other possible option characters for future use! TSMTRACE << " Error: invalid option character" << endl; return false; } } processingPattern = true; // next spec should be a pattern string } p->m_matchSpecList.clear(); p->m_matchSpecList = newMatchSpecList; p->m_regexList.clear(); p->m_regexList = newRegexList; p->m_matchSpecString = newMatchSpecString; //newRegexList.clear(); // no need to do this? TSMTRACE << " Final patternString: '" << p->m_matchSpecString << "'" << endl; TSMTRACE << " Number of regex match patterns in list: '" << p->m_regexList.count() << "'" << endl; TSMTRACE << " Notifying slots of pattern change" << endl; emit patternsChanged(); TSMTRACE << " All slots have been notified" << endl; TSMTRACE << "TDEStringMatcher::setPatterns: Patterns were successfully regenerated" << endl << endl; return true; } //================================================================================================ // Match functions //================================================================================================ bool TDEStringMatcher::matchAny( const TQString& stringToMatch ) { TSMTRACE << "Attempting to match string '" << stringToMatch << "' against stored patterns" << endl; if ( p->m_matchSpecList.isEmpty() ) { //-Debug: TSMTRACE << "Match failed on empty pattern list!" << endl; return false; //FIXME: or should that be true per MicheleC's comment? } TQString equivalentString; for ( size_t index = 0 ; index < p->m_matchSpecList.count() ; index++ ) { TQString matchThis = stringToMatch; if ( p->m_matchSpecList[index].ancHandling == ANCHandling::EQUIVALENCE ) { if ( equivalentString.isNull() ) { // FIXME TBD: This is where we will be converting each alphanumeric // character in stringToMatch to its "least" equivalent and storing // the result in equivalentString. Until then, we'll just do: equivalentString = stringToMatch; } matchThis = equivalentString; } switch ( p->m_matchSpecList[index].patternType ) { case PatternType::REGEX : case PatternType::WILDCARD : if ( ( p->m_regexList[index].search( matchThis ) >= 0 ) // was there a match? == p->m_matchSpecList[index].wantMatch // is that what we were looking for? ) { TSMTRACE << "Match succeeded with regex pattern: '" << p->m_regexList[index].pattern() << "'" << endl; return true; } break; case PatternType::SUBSTRING : bool cs = ! (bool) p->m_matchSpecList[index].ancHandling; if ( ( matchThis.find( p->m_matchSpecList[index].pattern, 0, cs ) >= 0 ) // was there a match? == p->m_matchSpecList[index].wantMatch // is that what we were looking for? ) { TSMTRACE << "Match succeeded with substring: '" << p->m_matchSpecList[index].pattern << "'" << endl; return true; } break; } } //-Debug: TSMTRACE << "Match failed, no pattern matched!" << endl; return false ; } bool TDEStringMatcher::matchAll( const TQString& stringToMatch ) { //-Debug: TSMTRACE << "Attempting to match string '" << stringToMatch << "' against stored patterns" << endl; if ( p->m_matchSpecList.isEmpty() ) { //-Debug: TSMTRACE << "Match failed on empty pattern list!" << endl; return false; //FIXME: or should that be true per MicheleC's comment? } TQString equivalentString; for ( size_t index = 0 ; index < p->m_matchSpecList.count() ; index++ ) { TQString matchThis = stringToMatch; if ( p->m_matchSpecList[index].ancHandling == ANCHandling::EQUIVALENCE ) { if ( equivalentString.isNull() ) { // FIXME TBD: This is where we will be converting each alphanumeric // character in stringToMatch to its "least" equivalent and storing // the result in equivalentString. Until then, we'll just do: equivalentString = stringToMatch; } matchThis = equivalentString; } if ( ( p->m_regexList[index].search( matchThis ) < 0 ) // was there no match? != p->m_matchSpecList[index].wantMatch // is that what we were looking for? ) { //-Debug: TSMTRACE << "String fail3ed to matching pattern: '" << rxPattern->pattern() << "'" << endl; return false; } if ( p->m_regexList[index].search( matchThis ) < 0 ) { //-Debug: TSMTRACE << "String failed to match pattern: '" << rxPattern->pattern() << "'" << endl; return false; } } //-Debug: TSMTRACE << "Match succeeded, all patterns matched!" << endl; return true; } //================================================================================================ // Utility functions //================================================================================================ /* The following code is a modified copy of that found in tqt3/src/tools/qregexp.cpp. */ TQString TDEStringMatcher::wildcardToRegex( const TQString& wildcardPattern ) { int wclen = wildcardPattern.length(); TQString rx = TQString::fromLatin1( "" ); int i = 0; const TQChar *wc = wildcardPattern.unicode(); while ( i < wclen ) { TQChar c = wc[i++]; switch ( c.unicode() ) { case '*': rx += TQString::fromLatin1( ".*" ); break; case '?': rx += TQChar( '.' ); break; case '$': case '(': case ')': case '+': case '.': case '\\': case '^': case '{': case '|': case '}': rx += TQChar( '\\' ); rx += c; break; case '[': rx += c; /* This is not correct, POSIX states that negation character is '!' if ( wc[i] == TQChar('^') ) rx += wc[i++]; */ if ( wc[i] == TQChar('!') ) { rx += TQChar('^'); i++; } else if ( wc[i] == TQChar('^') ) { rx += TQChar( '\\' ); rx += wc[i++]; } if ( i < wclen ) { if ( rx[i] == ']' ) rx += wc[i++]; while ( i < wclen && wc[i] != TQChar(']') ) { if ( wc[i] == '\\' ) rx += TQChar( '\\' ); rx += wc[i++]; } } break; default: rx += c; } } /* Wildcard patterns must match entire string */ return TQChar('^') + rx + TQChar('$'); /* TBD: Add support for extglob */ } static TQString escapeRegexChars( const TQString& basicString ) { int wclen = basicString.length(); TQString outputString = TQString::fromLatin1( "" ); int i = 0; const TQChar *wc = basicString.unicode(); while ( i < wclen ) { TQChar c = wc[i++]; switch ( c.unicode() ) { case '+': case '.': case '^': case '(': case ')': case '[': case ']': case '{': case '}': case '|': case '$': case '?': case '*': case '\\': outputString += TQChar( '\\' ); outputString += c; break; default: outputString += c; } } return outputString; } //================================================================================================ #include "tdestringmatcher.moc"