net.sourceforge.phpeclipse.externaltools/src/net/sourceforge/phpdt/externaltools/model/StringMatcher.java

   1 package net.sourceforge.phpdt.externaltools.model;
   2
   3 /**********************************************************************
   4  Copyright (c) 2000, 2002 IBM Corp.  All rights reserved.
   5  This file is made available under the terms of the Common Public License v1.0
   6  which accompanies this distribution, and is available at
   7  http://www.eclipse.org/legal/cpl-v10.html
   8  **********************************************************************/
   9
  10 import java.util.Vector;
  11
  12 /**
  13  * Copied from net.sourceforge.phpdt.internal.ui.util.StringMatcher
  14  *
  15  * A string pattern matcher, suppporting * and ? wildcards.
  16  */
  17 public class StringMatcher {
  18         protected String fPattern;
  19
  20         protected int fLength; // pattern length
  21
  22         protected boolean fIgnoreWildCards;
  23
  24         protected boolean fIgnoreCase;
  25
  26         protected boolean fHasLeadingStar;
  27
  28         protected boolean fHasTrailingStar;
  29
  30         protected String fSegments[]; // the given pattern is split into *
  31                                                                         // separated segments
  32
  33         /* boundary value beyond which we don't need to search in the text */
  34         protected int fBound = 0;
  35
  36         protected static final char fSingleWildCard = '\u0000';
  37
  38         public static class Position {
  39                 int start; // inclusive
  40
  41                 int end; // exclusive
  42
  43                 public Position(int start, int end) {
  44                         this.start = start;
  45                         this.end = end;
  46                 }
  47
  48                 public int getStart() {
  49                         return start;
  50                 }
  51
  52                 public int getEnd() {
  53                         return end;
  54                 }
  55         }
  56
  57         /**
  58          * StringMatcher constructor takes in a String object that is a simple
  59          * pattern which may contain �*� for 0 and many characters and �?� for
  60          * exactly one character.
  61          *
  62          * Literal '*' and '?' characters must be escaped in the pattern e.g., "\*"
  63          * means literal "*", etc.
  64          *
  65          * Escaping any other character (including the escape character itself),
  66          * just results in that character in the pattern. e.g., "\a" means "a" and
  67          * "\\" means "\"
  68          *
  69          * If invoking the StringMatcher with string literals in Java, don't forget
  70          * escape characters are represented by "\\".
  71          *
  72          * @param pattern
  73          *            the pattern to match text against
  74          * @param ignoreCase
  75          *            if true, case is ignored
  76          * @param ignoreWildCards
  77          *            if true, wild cards and their escape sequences are ignored
  78          *            (everything is taken literally).
  79          */
  80 //      public StringMatcher(String pattern, boolean ignoreCase,
  81 //                      boolean ignoreWildCards) {
  82 //              if (pattern == null)
  83 //                      throw new IllegalArgumentException();
  84 //              fIgnoreCase = ignoreCase;
  85 //              fIgnoreWildCards = ignoreWildCards;
  86 //              fPattern = pattern;
  87 //              fLength = pattern.length();
  88 //
  89 //              if (fIgnoreWildCards) {
  90 //                      parseNoWildCards();
  91 //              } else {
  92 //                      parseWildCards();
  93 //              }
  94 //      }
  95
  96         /**
  97          * Find the first occurrence of the pattern between
  98          * <code>start</code)(inclusive)
  99          * and <code>end</code>(exclusive).
 100          * @param <code>text</code>, the String object to search in
 101          * @param <code>start</code>, the starting index of the search range, inclusive
 102          * @param <code>end</code>, the ending index of the search range, exclusive
 103          * @return an <code>StringMatcher.Position</code> object that keeps the starting
 104          * (inclusive) and ending positions (exclusive) of the first occurrence of the
 105          * pattern in the specified range of the text; return null if not found or subtext
 106          * is empty (start==end). A pair of zeros is returned if pattern is empty string
 107          * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
 108          * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
 109          */
 110 //      public StringMatcher.Position find(String text, int start, int end) {
 111 //              if (text == null)
 112 //                      throw new IllegalArgumentException();
 113 //
 114 //              int tlen = text.length();
 115 //              if (start < 0)
 116 //                      start = 0;
 117 //              if (end > tlen)
 118 //                      end = tlen;
 119 //              if (end < 0 || start >= end)
 120 //                      return null;
 121 //              if (fLength == 0)
 122 //                      return new Position(start, start);
 123 //              if (fIgnoreWildCards) {
 124 //                      int x = posIn(text, start, end);
 125 //                      if (x < 0)
 126 //                              return null;
 127 //                      return new Position(x, x + fLength);
 128 //              }
 129 //
 130 //              int segCount = fSegments.length;
 131 //              if (segCount == 0)// pattern contains only '*'(s)
 132 //                      return new Position(start, end);
 133 //
 134 //              int curPos = start;
 135 //              int matchStart = -1;
 136 //              int i;
 137 //              for (i = 0; i < segCount && curPos < end; ++i) {
 138 //                      String current = fSegments[i];
 139 //                      int nextMatch = regExpPosIn(text, curPos, end, current);
 140 //                      if (nextMatch < 0)
 141 //                              return null;
 142 //                      if (i == 0)
 143 //                              matchStart = nextMatch;
 144 //                      curPos = nextMatch + current.length();
 145 //              }
 146 //              if (i < segCount)
 147 //                      return null;
 148 //              return new Position(matchStart, curPos);
 149 //      }
 150
 151         /**
 152          * match the given <code>text</code> with the pattern
 153          *
 154          * @return true if matched eitherwise false
 155          * @param <code>text</code>, a String object
 156          */
 157         public boolean match(String text) {
 158                 return match(text, 0, text.length());
 159         }
 160
 161         /**
 162          * Given the starting (inclusive) and the ending (exclusive) positions in
 163          * the <code>text</code>, determine if the given substring matches with
 164          * aPattern
 165          *
 166          * @return true if the specified portion of the text matches the pattern
 167          * @param String
 168          *            <code>text</code>, a String object that contains the
 169          *            substring to match
 170          * @param int
 171          *            <code>start<code> marks the starting position (inclusive) of the substring
 172          * @param int <code>end<code> marks the ending index (exclusive) of the substring
 173          */
 174         public boolean match(String text, int start, int end) {
 175                 if (null == text)
 176                         throw new IllegalArgumentException();
 177
 178                 if (start > end)
 179                         return false;
 180
 181                 if (fIgnoreWildCards)
 182                         return (end - start == fLength)
 183                                         && fPattern.regionMatches(fIgnoreCase, 0, text, start,
 184                                                         fLength);
 185                 int segCount = fSegments.length;
 186                 if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) // pattern
 187                                                                                                                                         // contains
 188                                                                                                                                         // only
 189                                                                                                                                         // '*'(s)
 190                         return true;
 191                 if (start == end)
 192                         return fLength == 0;
 193                 if (fLength == 0)
 194                         return start == end;
 195
 196                 int tlen = text.length();
 197                 if (start < 0)
 198                         start = 0;
 199                 if (end > tlen)
 200                         end = tlen;
 201
 202                 int tCurPos = start;
 203                 int bound = end - fBound;
 204                 if (bound < 0)
 205                         return false;
 206                 int i = 0;
 207                 String current = fSegments[i];
 208                 int segLength = current.length();
 209
 210                 /* process first segment */
 211                 if (!fHasLeadingStar) {
 212                         if (!regExpRegionMatches(text, start, current, 0, segLength)) {
 213                                 return false;
 214                         } else {
 215                                 ++i;
 216                                 tCurPos = tCurPos + segLength;
 217                         }
 218                 }
 219
 220                 /* process middle segments */
 221                 while (i < segCount) {
 222                         current = fSegments[i];
 223                         int currentMatch;
 224                         int k = current.indexOf(fSingleWildCard);
 225                         if (k < 0) {
 226                                 currentMatch = textPosIn(text, tCurPos, end, current);
 227                                 if (currentMatch < 0)
 228                                         return false;
 229                         } else {
 230                                 currentMatch = regExpPosIn(text, tCurPos, end, current);
 231                                 if (currentMatch < 0)
 232                                         return false;
 233                         }
 234                         tCurPos = currentMatch + current.length();
 235                         i++;
 236                 }
 237
 238                 /* process final segment */
 239                 if (!fHasTrailingStar && tCurPos != end) {
 240                         int clen = current.length();
 241                         return regExpRegionMatches(text, end - clen, current, 0, clen);
 242                 }
 243                 return i == segCount;
 244         }
 245
 246         /**
 247          * This method parses the given pattern into segments seperated by wildcard
 248          * '*' characters. Since wildcards are not being used in this case, the
 249          * pattern consists of a single segment.
 250          */
 251         private void parseNoWildCards() {
 252                 fSegments = new String[1];
 253                 fSegments[0] = fPattern;
 254                 fBound = fLength;
 255         }
 256
 257         /**
 258          * Parses the given pattern into segments seperated by wildcard '*'
 259          * characters.
 260          *
 261          * @param p,
 262          *            a String object that is a simple regular expression with �*�
 263          *            and/or �?�
 264          */
 265         private void parseWildCards() {
 266                 if (fPattern.startsWith("*"))//$NON-NLS-1$
 267                         fHasLeadingStar = true;
 268                 if (fPattern.endsWith("*")) {//$NON-NLS-1$
 269                         /* make sure it's not an escaped wildcard */
 270                         if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
 271                                 fHasTrailingStar = true;
 272                         }
 273                 }
 274
 275                 Vector temp = new Vector();
 276
 277                 int pos = 0;
 278                 StringBuffer buf = new StringBuffer();
 279                 while (pos < fLength) {
 280                         char c = fPattern.charAt(pos++);
 281                         switch (c) {
 282                         case '\\':
 283                                 if (pos >= fLength) {
 284                                         buf.append(c);
 285                                 } else {
 286                                         char next = fPattern.charAt(pos++);
 287                                         /* if it's an escape sequence */
 288                                         if (next == '*' || next == '?' || next == '\\') {
 289                                                 buf.append(next);
 290                                         } else {
 291                                                 /* not an escape sequence, just insert literally */
 292                                                 buf.append(c);
 293                                                 buf.append(next);
 294                                         }
 295                                 }
 296                                 break;
 297                         case '*':
 298                                 if (buf.length() > 0) {
 299                                         /* new segment */
 300                                         temp.addElement(buf.toString());
 301                                         fBound += buf.length();
 302                                         buf.setLength(0);
 303                                 }
 304                                 break;
 305                         case '?':
 306                                 /* append special character representing single match wildcard */
 307                                 buf.append(fSingleWildCard);
 308                                 break;
 309                         default:
 310                                 buf.append(c);
 311                         }
 312                 }
 313
 314                 /* add last buffer to segment list */
 315                 if (buf.length() > 0) {
 316                         temp.addElement(buf.toString());
 317                         fBound += buf.length();
 318                 }
 319
 320                 fSegments = new String[temp.size()];
 321                 temp.copyInto(fSegments);
 322         }
 323
 324         /**
 325          * @param <code>text</code>, a string which contains no wildcard
 326          * @param <code>start</code>, the starting index in the text for search,
 327          *            inclusive
 328          * @param <code>end</code>, the stopping point of search, exclusive
 329          * @return the starting index in the text of the pattern , or -1 if not
 330          *         found
 331          */
 332 //      protected int posIn(String text, int start, int end) {// no wild card in
 333 //                                                                                                                      // pattern
 334 //              int max = end - fLength;
 335 //
 336 //              if (!fIgnoreCase) {
 337 //                      int i = text.indexOf(fPattern, start);
 338 //                      if (i == -1 || i > max)
 339 //                              return -1;
 340 //                      return i;
 341 //              }
 342 //
 343 //              for (int i = start; i <= max; ++i) {
 344 //                      if (text.regionMatches(true, i, fPattern, 0, fLength))
 345 //                              return i;
 346 //              }
 347 //
 348 //              return -1;
 349 //      }
 350
 351         /**
 352          * @param <code>text</code>, a simple regular expression that may only
 353          *            contain '?'(s)
 354          * @param <code>start</code>, the starting index in the text for search,
 355          *            inclusive
 356          * @param <code>end</code>, the stopping point of search, exclusive
 357          * @param <code>p</code>, a simple regular expression that may contains '?'
 358          * @param <code>caseIgnored</code>, wether the pattern is not casesensitive
 359          * @return the starting index in the text of the pattern , or -1 if not
 360          *         found
 361          */
 362         protected int regExpPosIn(String text, int start, int end, String p) {
 363                 int plen = p.length();
 364
 365                 int max = end - plen;
 366                 for (int i = start; i <= max; ++i) {
 367                         if (regExpRegionMatches(text, i, p, 0, plen))
 368                                 return i;
 369                 }
 370                 return -1;
 371         }
 372
 373         /**
 374          *
 375          * @return boolean
 376          * @param <code>text</code>, a String to match
 377          * @param <code>start</code>, int that indicates the starting index of
 378          *            match, inclusive
 379          * @param <code>end</code> int that indicates the ending index of match,
 380          *            exclusive
 381          * @param <code>p</code>, String, String, a simple regular expression that
 382          *            may contain '?'
 383          * @param <code>ignoreCase</code>, boolean indicating wether code>p</code>
 384          *            is case sensitive
 385          */
 386         protected boolean regExpRegionMatches(String text, int tStart, String p,
 387                         int pStart, int plen) {
 388                 while (plen-- > 0) {
 389                         char tchar = text.charAt(tStart++);
 390                         char pchar = p.charAt(pStart++);
 391
 392                         /* process wild cards */
 393                         if (!fIgnoreWildCards) {
 394                                 /* skip single wild cards */
 395                                 if (pchar == fSingleWildCard) {
 396                                         continue;
 397                                 }
 398                         }
 399                         if (pchar == tchar)
 400                                 continue;
 401                         if (fIgnoreCase) {
 402                                 if (Character.toUpperCase(tchar) == Character
 403                                                 .toUpperCase(pchar))
 404                                         continue;
 405                                 // comparing after converting to upper case doesn't handle all
 406                                 // cases;
 407                                 // also compare after converting to lower case
 408                                 if (Character.toLowerCase(tchar) == Character
 409                                                 .toLowerCase(pchar))
 410                                         continue;
 411                         }
 412                         return false;
 413                 }
 414                 return true;
 415         }
 416
 417         /**
 418          * @param <code>text</code>, the string to match
 419          * @param <code>start</code>, the starting index in the text for search,
 420          *            inclusive
 421          * @param <code>end</code>, the stopping point of search, exclusive
 422          * @param code>p
 423          *            </code>, a string that has no wildcard
 424          * @param <code>
 425          *            ignoreCase</code>, boolean indicating wether code>p</code>
 426          *            is case sensitive
 427          * @return the starting index in the text of the pattern , or -1 if not
 428          *         found
 429          */
 430         protected int textPosIn(String text, int start, int end, String p) {
 431
 432                 int plen = p.length();
 433                 int max = end - plen;
 434
 435                 if (!fIgnoreCase) {
 436                         int i = text.indexOf(p, start);
 437                         if (i == -1 || i > max)
 438                                 return -1;
 439                         return i;
 440                 }
 441
 442                 for (int i = start; i <= max; ++i) {
 443                         if (text.regionMatches(true, i, p, 0, plen))
 444                                 return i;
 445                 }
 446
 447                 return -1;
 448         }
 449 }