Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members | Related Pages

wildmat.c

Go to the documentation of this file.
00001 /*  $Revision: 1.2 $
00002 **
00003 **  Do shell-style pattern matching for ?, \, [], and * characters.
00004 **  Might not be robust in face of malformed patterns; e.g., "foo[a-"
00005 **  could cause a segmentation violation.  It is 8bit clean.
00006 **
00007 **  Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
00008 **  Rich $alz is now <rsalz@osf.org>.
00009 **  April, 1991:  Replaced mutually-recursive calls with in-line code
00010 **  for the star character.
00011 **
00012 **  Special thanks to Lars Mathiesen <thorinn@diku.dk> for the ABORT code.
00013 **  This can greatly speed up failing wildcard patterns.  For example:
00014 **      pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
00015 **      text 1:  -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
00016 **      text 2:  -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1
00017 **  Text 1 matches with 51 calls, while text 2 fails with 54 calls.  Without
00018 **  the ABORT code, it takes 22310 calls to fail.  Ugh.  The following
00019 **  explanation is from Lars:
00020 **  The precondition that must be fulfilled is that DoMatch will consume
00021 **  at least one character in text.  This is true if *p is neither '*' nor
00022 **  '\0'.)  The last return has ABORT instead of FALSE to avoid quadratic
00023 **  behaviour in cases like pattern "*a*b*c*d" with text "abcxxxxx".  With
00024 **  FALSE, each star-loop has to run to the end of the text; with ABORT
00025 **  only the last one does.
00026 **
00027 **  Once the control of one instance of DoMatch enters the star-loop, that
00028 **  instance will return either TRUE or ABORT, and any calling instance
00029 **  will therefore return immediately after (without calling recursively
00030 **  again).  In effect, only one star-loop is ever active.  It would be
00031 **  possible to modify the code to maintain this context explicitly,
00032 **  eliminating all recursive calls at the cost of some complication and
00033 **  loss of clarity (and the ABORT stuff seems to be unclear enough by
00034 **  itself).  I think it would be unwise to try to get this into a
00035 **  released version unless you have a good test data base to try it out
00036 **  on.
00037 */
00038 #include <stdio.h>
00039 #include <sys/types.h>
00040 //#include "configdata.h"
00041 //#include "clibrary.h"
00042 #include "wildmat.h"
00043 
00044 
00045 #define TRUE                    1
00046 #define FALSE                   0
00047 #define ABORT                   -1
00048 
00049 
00050     /* What character marks an inverted character class? */
00051 #define NEGATE_CLASS            '^'
00052     /* Is "*" a common pattern? */
00053 #define OPTIMIZE_JUST_STAR
00054     /* Do tar(1) matching rules, which ignore a trailing slash? */
00055 #undef MATCH_TAR_PATTERN
00056 
00057 
00058 /*
00059 **  Match text and p, return TRUE, FALSE, or ABORT.
00060 */
00061 static int DoMatch(text, p)
00062 register char *text;
00063 register char *p;
00064 {
00065         register int last;
00066         register int matched;
00067         register int reverse;
00068 
00069         for (; *p; text++, p++) {
00070                 if (*text == '\0' && *p != '*')
00071                         return ABORT;
00072                 switch (*p) {
00073                 case '\\':
00074                         /* Literal match with following character. */
00075                         p++;
00076                         /* FALLTHROUGH */
00077                 default:
00078                         if (*text != *p)
00079                                 return FALSE;
00080                         continue;
00081                 case '?':
00082                         /* Match anything. */
00083                         continue;
00084                 case '*':
00085                         while (*++p == '*')
00086                                 /* Consecutive stars act just like one. */
00087                                 continue;
00088                         if (*p == '\0')
00089                                 /* Trailing star matches everything. */
00090                                 return TRUE;
00091                         while (*text)
00092                                 if ((matched =
00093                                      DoMatch(text++, p)) != FALSE)
00094                                         return matched;
00095                         return ABORT;
00096                 case '[':
00097                         reverse = p[1] == NEGATE_CLASS ? TRUE : FALSE;
00098                         if (reverse)
00099                                 /* Inverted character class. */
00100                                 p++;
00101                         matched = FALSE;
00102                         if (p[1] == ']' || p[1] == '-')
00103                                 if (*++p == *text)
00104                                         matched = TRUE;
00105                         for (last = *p; *++p && *p != ']'; last = *p)
00106                                 /* This next line requires a good C compiler. */
00107                                 if (*p == '-' && p[1] != ']'
00108                                     ? *text <= *++p
00109                                     && *text >= last : *text == *p)
00110                                         matched = TRUE;
00111                         if (matched == reverse)
00112                                 return FALSE;
00113                         continue;
00114                 }
00115         }
00116 
00117 #ifdef  MATCH_TAR_PATTERN
00118         if (*text == '/')
00119                 return TRUE;
00120 #endif                          /* MATCH_TAR_ATTERN */
00121         return *text == '\0';
00122 }
00123 
00124 
00125 /*
00126 **  User-level routine.  Returns TRUE or FALSE.
00127 */
00128 int wildmat(const char *text, const char *p)
00129 {
00130 #ifdef  OPTIMIZE_JUST_STAR
00131         if (p[0] == '*' && p[1] == '\0')
00132                 return TRUE;
00133 #endif                          /* OPTIMIZE_JUST_STAR */
00134         return DoMatch(text, p) == TRUE;
00135 }
00136 
00137 
00138 
00139 #if     defined(TEST)
00140 
00141 /* Yes, we use gets not fgets.  Sue me. */
00142 extern char *gets();
00143 
00144 
00145 int main()
00146 {
00147         char p[80];
00148         char text[80];
00149 
00150         printf("Wildmat tester.  Enter pattern, then strings to test.\n");
00151         printf
00152             ("A blank line gets prompts for a new pattern; a blank pattern\n");
00153         printf("exits the program.\n");
00154 
00155         for (;;) {
00156                 printf("\nEnter pattern:  ");
00157                 (void) fflush(stdout);
00158                 if (gets(p) == NULL || p[0] == '\0')
00159                         break;
00160                 for (;;) {
00161                         printf("Enter text:  ");
00162                         (void) fflush(stdout);
00163                         if (gets(text) == NULL)
00164                                 exit(0);
00165                         if (text[0] == '\0')
00166                                 /* Blank line; go back and get a new pattern. */
00167                                 break;
00168                         printf("      %s\n",
00169                                wildmat(text, p) ? "YES" : "NO");
00170                 }
00171         }
00172 
00173         exit(0);
00174         /* NOTREACHED */
00175 }
00176 #endif                          /* defined(TEST) */

Generated on Fri Aug 20 10:58:08 2004 for NewsCache by doxygen 1.3.6-20040222