@ -3,167 +3,512 @@
# include <tqregexp.h>
# include <tqregexp.h>
# include <kdebug.h>
# include <kdebug.h>
typedef TQValueVector < TQRegExp > RegexList ;
class TDEStringMatcher : : TDEStringMatcherPrivate {
class TDEStringMatcher : : TDEStringMatcherPrivate {
public :
public :
TQString patternString ;
} ; / / FIXME : This may be too small to warrant a private class : \
// Properties that may be set / accessed through the TSM interface
TQString m_matchSpecString ;
MatchSpecList m_matchSpecList ;
// Properties that implementation only
RegexList m_regexList ;
/* Individual TQRegExp objects would not be used to process
a PatternType doesn ' t require a regex engine for matching
but we may " borrow " the TQRegExp . pattern ( ) field to store
a " converted " version of the pattern .
*/
} ;
TDEStringMatcher : : TDEStringMatcher ( )
TDEStringMatcher : : TDEStringMatcher ( )
{
{
p = new TDEStringMatcherPrivate ;
TSMTRACE < < " TDEStringMatcher::TDEStringMatcher: New instance created: " < < this < < endl ;
TSMTRACE < < " TDEStringMatcher::TDEStringMatcher: New instance created: " < < this < < endl ;
p = new TDEStringMatcherPrivate ;
}
}
TDEStringMatcher : : ~ TDEStringMatcher ( )
TDEStringMatcher : : ~ TDEStringMatcher ( )
{
{
patternList . setAutoDelete ( true ) ;
p - > m_matchSpecList . clear ( ) ;
patternList . clear ( ) ;
p - > m_regex List. clear ( ) ;
delete p ;
delete p ;
TSMTRACE < < " TDEStringMatcher::TDEStringMatcher: Instance destroyed: " < < this < < endl ;
TSMTRACE < < " TDEStringMatcher::TDEStringMatcher: Instance destroyed: " < < this < < endl ;
}
}
TQString TDEStringMatcher : : getPatternString ( )
//================================================================================================
// Match specification output functions
//================================================================================================
TQString TDEStringMatcher : : getMatchSpecString ( )
{
{
return p - > patternString ;
return p - > m_matchSpecString ;
}
MatchSpecList TDEStringMatcher : : getMatchSpecs ( )
{
return p - > m_matchSpecList ;
}
//================================================================================================
// Match specification input functions
//================================================================================================
bool TDEStringMatcher : : setMatchSpecs ( MatchSpecList newMatchSpecList )
{
RegexList newRegexList ;
TQString optionString = " rc " ; // start with defaults
TQStringList newMatchSpecs ;
TQRegExp rxWork ;
TSMTRACE < < " TDEStringMatcher::setPatterns: validating match specification list " < < endl ;
for ( MatchSpec matchSpec : newMatchSpecList ) {
if ( matchSpec . pattern . isEmpty ( ) ) {
TSMTRACE < < " Error: empty pattern! " < < endl ;
newRegexList . clear ( ) ;
return false ;
}
if ( matchSpec . pattern . find ( TQChar ( SEP ) ) > = 0 ) {
TSMTRACE < < " Error: pattern contains reserved separator character " < < endl ;
newRegexList . clear ( ) ;
return false ;
}
switch ( matchSpec . patternType ) {
// The following pattern types will be using TQRegExp functions for matching
case PatternType : : REGEX :
optionString + = TQChar ( ' r ' ) ;
rxWork . setPattern ( matchSpec . pattern ) ;
break ;
case PatternType : : WILDCARD :
optionString + = TQChar ( ' w ' ) ;
rxWork . setPattern ( wildcardToRegex ( matchSpec . pattern ) ) ;
break ;
// The following pattern types will be using TQString functions for matching
case PatternType : : SUBSTRING :
optionString + = TQChar ( ' s ' ) ;
rxWork . setPattern ( matchSpec . pattern ) ; // we will "borrow" this field
break ;
default :
newRegexList . clear ( ) ;
TSMTRACE < < " Error: pattern type out of range " < < endl ;
return false ;
}
switch ( matchSpec . ancHandling ) {
case ANCHandling : : CASE_SENSITIVE :
optionString + = TQChar ( ' c ' ) ;
rxWork . setCaseSensitive ( true ) ;
break ;
case ANCHandling : : CASE_INSENSITIVE :
optionString + = TQChar ( ' i ' ) ;
rxWork . setCaseSensitive ( false ) ;
break ;
case ANCHandling : : EQUIVALENCE :
optionString + = TQChar ( ' e ' ) ;
rxWork . setCaseSensitive ( true ) ;
// FIXME TBD: This is where we will be converting each (unescaped)
// alphanumeric character in rxWork.pattern to its "least" equivalent.
break ;
default :
newRegexList . clear ( ) ;
TSMTRACE < < " Error: alphabetic character handling specification out of range " < < endl ;
return false ;
}
if ( matchSpec . wantMatch )
optionString + = TQChar ( ' = ' ) ;
else
optionString + = TQChar ( ' ! ' ) ;
if ( ! rxWork . isValid ( ) ) {
TSMTRACE < < " Error: invalid pattern syntax' " < < endl ;
newRegexList . clear ( ) ;
return false ;
}
// This particular match specification is good
newMatchSpecs . append ( optionString ) ;
newMatchSpecs . append ( matchSpec . pattern ) ;
newRegexList . append ( rxWork ) ;
optionString = " " ;
}
// All proposed match specifications are good, update everything accordingly
p - > m_matchSpecList . clear ( ) ; p - > m_matchSpecList = newMatchSpecList ;
p - > m_regexList . clear ( ) ; p - > m_regexList = newRegexList ;
p - > m_matchSpecString = newMatchSpecs . join ( TQChar ( SEP ) ) ;
emit patternsChanged ( ) ;
return true ;
}
}
bool TDEStringMatcher : : generatePatternList ( TQString newPatternString )
//=================================================================================================
bool TDEStringMatcher : : setMatchSpecs ( TQString newMatchSpecString )
{
{
if ( newPatternString = = p - > patternString )
MatchSpecList newMatchSpecList ;
RegexList newRegexList ;
TQRegExp rxWork ; // single working copy == each pattern inherits previous options
MatchSpec matchSpec = {
PatternType : : DEFAULT ,
ANCHandling : : DEFAULT ,
true , // seeking matches, not non-matches
" "
} ;
if ( newMatchSpecString = = p - > m_matchSpecString )
return true ;
return true ;
TSMTRACE < < " TDEStringMatcher::generatePatternList: Proposed pattern string: < " < < newPatternString < < " > " < < endl ;
TSMTRACE < < " TDEStringMatcher::setPatterns: Proposed match specification string: < " < < newMatchSpecString < < " > " < < endl ;
if ( newPatternString . length ( ) < 2 ) {
TSMTRACE < < " Input string too short to be interpreted, patterns will be cleared " < < endl ;
if ( newMatchSpecString . isEmpty ( ) ) {
patternList . clear ( ) ;
TSMTRACE < < " Empty pattern string => match specifications will be cleared " < < endl ;
p - > patternString = " " ;
p - > m_matchSpecList . clear ( ) ;
# ifdef TSMSIGNALS
p - > m_regexList . clear ( ) ;
p - > m_matchSpecString = " " ;
emit patternsChanged ( ) ;
emit patternsChanged ( ) ;
# endif // TSMSIGNALS
return true ;
return true ;
}
}
TQChar patternStringDivider = newPatternString [ 0 ] ;
TSMTRACE < < " patternStringDivider = ' " < < patternStringDivider < < " ' " < < endl ;
TQStringList specList = TQStringList : : split ( patternStringDivider , newPatternString . mid ( 1 ) , true ) ;
TQRegExp rxWork ;
TQStringList newMatchSpecs = TQStringList : : split ( SEP , newMatchSpecString , true ) ;
TQPtrList < TQRegExp > rxPatternList ;
if ( newMatchSpecs . count ( ) % 2 ! = 0 ) {
for ( const TQString & specification : specList ) {
TSMTRACE < < " Error: match specification string must contain an even number of components " < < endl ;
TSMTRACE < < " Processing specification string: ' " < < specification < < " ' " < < endl ;
return false ;
TQChar specificationType = specification [ 0 ] . lower ( ) ;
switch ( specificationType ) {
case ' o ' : {
TQString optionString = specification . mid ( 1 ) . lower ( ) ;
TSMTRACE < < " Processing match option string: ' " < < optionString < < " ' " < < endl ;
for ( int i = 0 ; i < optionString . length ( ) ; i + + ) {
TQChar optionChar = optionString [ i ] ;
TSMTRACE < < " Option character: ' " < < optionChar < < " ' " < < endl ;
switch ( optionChar ) {
case ' w ' : rxWork . setWildcard ( true ) ; break ;
case ' r ' : rxWork . setWildcard ( false ) ; break ;
case ' c ' : rxWork . setCaseSensitive ( true ) ; break ;
case ' i ' : rxWork . setCaseSensitive ( false ) ; break ;
case ' m ' : rxWork . setMinimal ( true ) ; break ;
case ' g ' : rxWork . setMinimal ( false ) ; break ;
default : break ;
}
}
TSMTRACE < < newMatchSpecs . count ( ) < < endl ;
bool processingPattern = false ; // expected format: option string , pattern string, ...
for ( TQString & specification : newMatchSpecs ) {
if ( specification . find ( TQChar ( SEP ) ) > = 0 ) {
TSMTRACE < < " Error: match specification string contains reserved separator character " < < endl ;
newMatchSpecList . clear ( ) ;
newRegexList . clear ( ) ;
return false ;
}
}
TSMTRACE < < " Wildcard/CaseSensitive settings: " < < rxWork . wildcard ( ) < < " / " < < rxWork . caseSensitive ( ) < < endl ;
if ( processingPattern ) {
TSMTRACE < < " Processing match pattern string: ' " < < specification < < " ' " < < endl ;
if ( specification . isEmpty ( ) ) {
TSMTRACE < < " Error: empty patterns are not allowed " < < endl ;
newMatchSpecList . clear ( ) ;
newRegexList . clear ( ) ;
return false ;
}
}
// Prepare regex
switch ( matchSpec . patternType ) {
// The following pattern types will be using TQRegExp functions for matching
case PatternType : : REGEX :
rxWork . setPattern ( specification ) ;
break ;
case PatternType : : WILDCARD :
rxWork . setPattern ( wildcardToRegex ( specification ) ) ;
break ;
break ;
case ' p ' : {
// The following pattern types will be using TQString functions for matching
TQString pattern = specification . mid ( 1 ) ;
case PatternType : : SUBSTRING :
TSMTRACE < < " Processing match pattern: ' " < < pattern < < " ' " < < endl ;
rxWork . setPattern ( specification ) ; // used for storage only
if ( pattern . isEmpty ( ) ) {
break ;
TSMTRACE < < " Empty patterns are not allowed " < < endl ;
rxPatternList . clear ( ) ;
default :
return false ;
continue ; // should not arise
}
switch ( matchSpec . ancHandling ) {
case ANCHandling : : CASE_SENSITIVE :
rxWork . setCaseSensitive ( true ) ;
break ;
case ANCHandling : : CASE_INSENSITIVE :
rxWork . setCaseSensitive ( false ) ;
break ;
case ANCHandling : : EQUIVALENCE :
rxWork . setCaseSensitive ( false ) ;
// FIXME TBD: This is where we will be converting each (unescaped)
// alphanumeric character in rxWork.pattern to its "least" equivalent.
break ;
default :
continue ; // should not arise
}
}
rxWork . setPattern ( pattern ) ;
// Test regex
if ( ! rxWork . isValid ( ) ) {
if ( ! rxWork . isValid ( ) ) {
TSMTRACE < < " Invalid pattern " < < endl ;
TSMTRACE < < " Error: invalid pattern syntax' " < < endl ;
rxPatternList . clear ( ) ;
newMatchSpecList . clear ( ) ;
newRegexList . clear ( ) ;
return false ;
return false ;
continue ;
}
}
TQRegExp * rxPattern = new TQRegExp ( rxWork ) ;
rxPatternList . append ( rxPattern ) ;
// if (! rxWork.isReallyWhatUserIntended() ) { HA HA
TSMTRACE < < " Final Wildcard/CaseSensitive settings: " < < rxWork . wildcard ( ) < < " / " < < rxWork . caseSensitive ( ) < < endl ;
matchSpec . pattern = specification ;
newMatchSpecList . push_back ( matchSpec ) ;
newRegexList . append ( rxWork ) ;
processingPattern = false ; // next spec should be an option string
continue ;
}
}
break ;
default :
specification = specification . lower ( ) ;
TSMTRACE < < " Ignoring unknown specification type ' " < < specificationType < < " ' " < < endl ;
TSMTRACE < < " Processing match option string: ' " < < specification < < " ' " < < endl ;
//-Relax, don't overreact: rxPatternList.clear();
for ( int i = 0 ; i < specification . length ( ) ; i + + ) {
//-Relax, don't overreact: return false;
TQChar optionChar = specification [ i ] ;
break ;
TSMTRACE < < " Option character: ' " < < optionChar < < " ' " < < endl ;
switch ( optionChar ) {
case ' r ' : matchSpec . patternType = PatternType : : REGEX ; break ;
case ' w ' : matchSpec . patternType = PatternType : : WILDCARD ; break ;
case ' s ' : matchSpec . patternType = PatternType : : SUBSTRING ; break ;
case ' c ' : matchSpec . ancHandling = ANCHandling : : CASE_SENSITIVE ; break ;
case ' i ' : matchSpec . ancHandling = ANCHandling : : CASE_INSENSITIVE ; break ;
case ' e ' : matchSpec . ancHandling = ANCHandling : : EQUIVALENCE ; break ;
case ' = ' : matchSpec . wantMatch = true ; break ;
case ' ! ' : matchSpec . wantMatch = false ; break ;
default :
// We reserve ALL other possible option characters for future use!
TSMTRACE < < " Error: invalid option character " < < endl ;
return false ;
}
}
}
}
// patternList.clear(); // no need to do this?
processingPattern = true ; // next spec should be a pattern string
patternList . setAutoDelete ( true ) ;
}
patternList = rxPatternList ;
p - > patternString = newPatternString ;
p - > m_matchSpecList . clear ( ) ; p - > m_matchSpecList = newMatchSpecList ;
// rxPatternList.clear(); // no need to do this?
p - > m_regexList . clear ( ) ; p - > m_regexList = newRegexList ;
p - > m_matchSpecString = newMatchSpecString ;
TSMTRACE < < " Final patternString: ' " < < p - > patternString < < " ' " < < endl ;
//newRegexList.clear(); // no need to do this?
TSMTRACE < < " Number of regex match patterns in list: ' " < < patternList . count ( ) < < " ' " < < endl ;
# ifdef TSMSIGNALS
TSMTRACE < < " Final patternString: ' " < < p - > m_matchSpecString < < " ' " < < endl ;
TSMTRACE < < " Number of regex match patterns in list: ' " < < p - > m_regexList . count ( ) < < " ' " < < endl ;
TSMTRACE < < " Notifying slots of pattern change " < < endl ;
TSMTRACE < < " Notifying slots of pattern change " < < endl ;
emit patternsChanged ( ) ;
emit patternsChanged ( ) ;
TSMTRACE < < " All slots have been notified " < < endl ;
TSMTRACE < < " All slots have been notified " < < endl ;
# endif // TSMSIGNALS
TSMTRACE < < " TDEStringMatcher::setPatterns: Patterns were successfully regenerated " < < endl < < endl ;
TSMTRACE < < " TDEStringMatcher::generatePatternList: Patterns were successfully regenerated " < < endl < < endl ;
return true ;
return true ;
}
}
//================================================================================================
// Match functions
//================================================================================================
bool TDEStringMatcher : : matchAny ( const TQString & stringToMatch )
bool TDEStringMatcher : : matchAny ( const TQString & stringToMatch )
{
{
//-Debug: TSMTRACE << "Attempting to match string '" << stringToMatch << "' against stored patterns" << endl;
TSMTRACE < < " Attempting to match string ' " < < stringToMatch < < " ' against stored patterns " < < endl ;
for ( const TQRegExp * rxPattern : patternList ) {
if ( p - > m_matchSpecList . isEmpty ( ) ) {
if (
//-Debug: TSMTRACE << "Match failed on empty pattern list!" << endl;
( rxPattern - > wildcard ( ) & & rxPattern - > exactMatch ( stringToMatch ) ) | |
return false ; //FIXME: or should that be true per MicheleC's comment?
( ! rxPattern - > wildcard ( ) & & rxPattern - > search ( stringToMatch ) > = 0 )
}
)
TQString equivalentString ;
for ( size_t index = 0 ; index < p - > m_matchSpecList . count ( ) ; index + + )
{
TQString matchThis = stringToMatch ;
if ( p - > m_matchSpecList [ index ] . ancHandling = = ANCHandling : : EQUIVALENCE )
{
{
//-Debug: TSMTRACE << "String matched pattern: '" << rxPattern->pattern() << "'" << endl;
if ( equivalentString . isNull ( ) ) {
// FIXME TBD: This is where we will be converting each alphanumeric
// character in stringToMatch to its "least" equivalent and storing
// the result in equivalentString. Until then, we'll just do:
equivalentString = stringToMatch ;
}
matchThis = equivalentString ;
}
switch ( p - > m_matchSpecList [ index ] . patternType ) {
case PatternType : : REGEX :
case PatternType : : WILDCARD :
if (
( p - > m_regexList [ index ] . search ( matchThis ) > = 0 ) // was there a match?
= = p - > m_matchSpecList [ index ] . wantMatch // is that what we were looking for?
) {
TSMTRACE < < " Match succeeded with regex pattern: ' " < < p - > m_regexList [ index ] . pattern ( ) < < " ' " < < endl ;
return true ;
return true ;
}
}
break ;
case PatternType : : SUBSTRING :
bool cs = ! ( bool ) p - > m_matchSpecList [ index ] . ancHandling ;
if (
( matchThis . find ( p - > m_matchSpecList [ index ] . pattern , 0 , cs ) > = 0 ) // was there a match?
= = p - > m_matchSpecList [ index ] . wantMatch // is that what we were looking for?
) {
TSMTRACE < < " Match succeeded with substring: ' " < < p - > m_matchSpecList [ index ] . pattern < < " ' " < < endl ;
return true ;
}
break ;
}
}
if ( patternList . isEmpty ( ) ) {
//-Debug: TSMTRACE << "Match failed on empty pattern list!" << endl;
return false ;
}
}
else {
//-Debug: TSMTRACE << "Match failed, no pattern matched!" << endl;
//-Debug: TSMTRACE << "Match failed, no pattern matched!" << endl;
return false ;
return false ;
}
}
}
bool TDEStringMatcher : : matchAll ( const TQString & stringToMatch )
bool TDEStringMatcher : : matchAll ( const TQString & stringToMatch )
{
{
//-Debug: TSMTRACE << "Attempting to match string '" << stringToMatch << "' against ALL stored patterns" << endl;
//-Debug: TSMTRACE << "Attempting to match string '" << stringToMatch << "' against stored patterns" << endl;
for ( const TQRegExp * rxPattern : patternList ) {
if ( p - > m_matchSpecList . isEmpty ( ) ) {
if ( !
//-Debug: TSMTRACE << "Match failed on empty pattern list!" << endl;
( rxPattern - > wildcard ( ) & & rxPattern - > exactMatch ( stringToMatch ) ) | |
return false ; //FIXME: or should that be true per MicheleC's comment?
( ! rxPattern - > wildcard ( ) & & rxPattern - > search ( stringToMatch ) > = 0 )
}
)
TQString equivalentString ;
for ( size_t index = 0 ; index < p - > m_matchSpecList . count ( ) ; index + + )
{
{
//-Debug: TSMTRACE << "String failed to match pattern: '" << rxPattern->pattern() << "'" << endl;
TQString matchThis = stringToMatch ;
return false ;
if ( p - > m_matchSpecList [ index ] . ancHandling = = ANCHandling : : EQUIVALENCE )
{
if ( equivalentString . isNull ( ) ) {
// FIXME TBD: This is where we will be converting each alphanumeric
// character in stringToMatch to its "least" equivalent and storing
// the result in equivalentString. Until then, we'll just do:
equivalentString = stringToMatch ;
}
}
matchThis = equivalentString ;
}
}
if ( patternList . isEmpty ( ) ) {
if (
//-Debug: TSMTRACE << "Match failed on empty pattern list!" << endl;
( p - > m_regexList [ index ] . search ( matchThis ) < 0 ) // was there no match?
! = p - > m_matchSpecList [ index ] . wantMatch // is that what we were looking for?
) {
//-Debug: TSMTRACE << "String fail3ed to matching pattern: '" << rxPattern->pattern() << "'" << endl;
return false ;
return false ;
}
}
else {
if ( p - > m_regexList [ index ] . search ( matchThis ) < 0 ) {
//-Debug: TSMTRACE << "String failed to match pattern: '" << rxPattern->pattern() << "'" << endl;
return false ;
}
}
//-Debug: TSMTRACE << "Match succeeded, all patterns matched!" << endl;
//-Debug: TSMTRACE << "Match succeeded, all patterns matched!" << endl;
return true ;
return true ;
}
//================================================================================================
// Utility functions
//================================================================================================
/*
The following code is a modified copy of that found in tqt3 / src / tools / qregexp . cpp .
*/
TQString TDEStringMatcher : : wildcardToRegex ( const TQString & wildcardPattern )
{
int wclen = wildcardPattern . length ( ) ;
TQString rx = TQString : : fromLatin1 ( " " ) ;
int i = 0 ;
const TQChar * wc = wildcardPattern . unicode ( ) ;
while ( i < wclen ) {
TQChar c = wc [ i + + ] ;
switch ( c . unicode ( ) ) {
case ' * ' :
rx + = TQString : : fromLatin1 ( " .* " ) ;
break ;
case ' ? ' :
rx + = TQChar ( ' . ' ) ;
break ;
case ' $ ' :
case ' ( ' :
case ' ) ' :
case ' + ' :
case ' . ' :
case ' \\ ' :
case ' ^ ' :
case ' { ' :
case ' | ' :
case ' } ' :
rx + = TQChar ( ' \\ ' ) ;
rx + = c ;
break ;
case ' [ ' :
rx + = c ;
/* This is not correct, POSIX states that negation character is '!'
if ( wc [ i ] = = TQChar ( ' ^ ' ) )
rx + = wc [ i + + ] ;
*/
if ( wc [ i ] = = TQChar ( ' ! ' ) ) {
rx + = TQChar ( ' ^ ' ) ;
i + + ;
} else if ( wc [ i ] = = TQChar ( ' ^ ' ) ) {
rx + = TQChar ( ' \\ ' ) ;
rx + = wc [ i + + ] ;
}
if ( i < wclen ) {
if ( rx [ i ] = = ' ] ' )
rx + = wc [ i + + ] ;
while ( i < wclen & & wc [ i ] ! = TQChar ( ' ] ' ) ) {
if ( wc [ i ] = = ' \\ ' )
rx + = TQChar ( ' \\ ' ) ;
rx + = wc [ i + + ] ;
}
}
}
break ;
default :
rx + = c ;
}
}
/* Wildcard patterns must match entire string */
return TQChar ( ' ^ ' ) + rx + TQChar ( ' $ ' ) ;
/* TBD: Add support for extglob */
}
}
static TQString escapeRegexChars ( const TQString & basicString )
{
int wclen = basicString . length ( ) ;
TQString outputString = TQString : : fromLatin1 ( " " ) ;
int i = 0 ;
const TQChar * wc = basicString . unicode ( ) ;
while ( i < wclen ) {
TQChar c = wc [ i + + ] ;
switch ( c . unicode ( ) ) {
case ' + ' :
case ' . ' :
case ' ^ ' :
case ' ( ' :
case ' ) ' :
case ' [ ' :
case ' ] ' :
case ' { ' :
case ' } ' :
case ' | ' :
case ' $ ' :
case ' ? ' :
case ' * ' :
case ' \\ ' :
outputString + = TQChar ( ' \\ ' ) ;
outputString + = c ;
break ;
default :
outputString + = c ;
}
}
return outputString ;
}
//================================================================================================
# include "tdestringmatcher.moc"
# include "tdestringmatcher.moc"