1 package net.sourceforge.phpdt.internal.ui.util;
3 import java.util.Vector;
6 * A string pattern matcher, suppporting * and ? wildcards.
8 public class StringMatcher {
9 protected String fPattern;
11 protected int fLength; // pattern length
13 protected boolean fIgnoreWildCards;
15 protected boolean fIgnoreCase;
17 protected boolean fHasLeadingStar;
19 protected boolean fHasTrailingStar;
21 protected String fSegments[]; // the given pattern is split into *
24 /* boundary value beyond which we don't need to search in the text */
25 protected int fBound = 0;
27 protected static final char fSingleWildCard = '\u0000';
29 public static class Position {
30 int start; // inclusive
34 public Position(int start, int end) {
39 public int getStart() {
49 * StringMatcher constructor takes in a String object that is a simple
50 * pattern which may contain �*� for 0 and many characters and �?� for
51 * exactly one character.
53 * Literal '*' and '?' characters must be escaped in the pattern e.g., "\*"
54 * means literal "*", etc.
56 * Escaping any other character (including the escape character itself),
57 * just results in that character in the pattern. e.g., "\a" means "a" and
60 * If invoking the StringMatcher with string literals in Java, don't forget
61 * escape characters are represented by "\\".
64 * the pattern to match text against
66 * if true, case is ignored
67 * @param ignoreWildCards
68 * if true, wild cards and their escape sequences are ignored
69 * (everything is taken literally).
71 public StringMatcher(String pattern, boolean ignoreCase,
72 boolean ignoreWildCards) {
74 throw new IllegalArgumentException();
75 fIgnoreCase = ignoreCase;
76 fIgnoreWildCards = ignoreWildCards;
78 fLength = pattern.length();
80 if (fIgnoreWildCards) {
88 * Find the first occurrence of the pattern between
89 * <code>start</code)(inclusive)
90 * and <code>end</code>(exclusive).
91 * @param <code>text</code>, the String object to search in
92 * @param <code>start</code>, the starting index of the search range, inclusive
93 * @param <code>end</code>, the ending index of the search range, exclusive
94 * @return an <code>StringMatcher.Position</code> object that keeps the starting
95 * (inclusive) and ending positions (exclusive) of the first occurrence of the
96 * pattern in the specified range of the text; return null if not found or subtext
97 * is empty (start==end). A pair of zeros is returned if pattern is empty string
98 * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
99 * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
101 public StringMatcher.Position find(String text, int start, int end) {
103 throw new IllegalArgumentException();
105 int tlen = text.length();
110 if (end < 0 || start >= end)
113 return new Position(start, start);
114 if (fIgnoreWildCards) {
115 int x = posIn(text, start, end);
118 return new Position(x, x + fLength);
121 int segCount = fSegments.length;
122 if (segCount == 0)// pattern contains only '*'(s)
123 return new Position(start, end);
128 for (i = 0; i < segCount && curPos < end; ++i) {
129 String current = fSegments[i];
130 int nextMatch = regExpPosIn(text, curPos, end, current);
134 matchStart = nextMatch;
135 curPos = nextMatch + current.length();
139 return new Position(matchStart, curPos);
143 * match the given <code>text</code> with the pattern
145 * @return true if matched eitherwise false
146 * @param <code>text</code>, a String object
148 public boolean match(String text) {
149 return match(text, 0, text.length());
153 * Given the starting (inclusive) and the ending (exclusive) positions in
154 * the <code>text</code>, determine if the given substring matches with
157 * @return true if the specified portion of the text matches the pattern
159 * <code>text</code>, a String object that contains the
162 * <code>start<code> marks the starting position (inclusive) of the substring
163 * @param int <code>end<code> marks the ending index (exclusive) of the substring
165 public boolean match(String text, int start, int end) {
167 throw new IllegalArgumentException();
172 if (fIgnoreWildCards)
173 return (end - start == fLength)
174 && fPattern.regionMatches(fIgnoreCase, 0, text, start,
176 int segCount = fSegments.length;
177 if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) // pattern
187 int tlen = text.length();
194 int bound = end - fBound;
198 String current = fSegments[i];
199 int segLength = current.length();
201 /* process first segment */
202 if (!fHasLeadingStar) {
203 if (!regExpRegionMatches(text, start, current, 0, segLength)) {
207 tCurPos = tCurPos + segLength;
211 /* process middle segments */
212 while (i < segCount) {
213 current = fSegments[i];
215 int k = current.indexOf(fSingleWildCard);
217 currentMatch = textPosIn(text, tCurPos, end, current);
218 if (currentMatch < 0)
221 currentMatch = regExpPosIn(text, tCurPos, end, current);
222 if (currentMatch < 0)
225 tCurPos = currentMatch + current.length();
229 /* process final segment */
230 if (!fHasTrailingStar && tCurPos != end) {
231 int clen = current.length();
232 return regExpRegionMatches(text, end - clen, current, 0, clen);
234 return i == segCount;
238 * This method parses the given pattern into segments seperated by wildcard
239 * '*' characters. Since wildcards are not being used in this case, the
240 * pattern consists of a single segment.
242 private void parseNoWildCards() {
243 fSegments = new String[1];
244 fSegments[0] = fPattern;
249 * Parses the given pattern into segments seperated by wildcard '*'
253 * a String object that is a simple regular expression with �*�
256 private void parseWildCards() {
257 if (fPattern.startsWith("*"))//$NON-NLS-1$
258 fHasLeadingStar = true;
259 if (fPattern.endsWith("*")) {//$NON-NLS-1$
260 /* make sure it's not an escaped wildcard */
261 if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
262 fHasTrailingStar = true;
266 Vector temp = new Vector();
269 StringBuffer buf = new StringBuffer();
270 while (pos < fLength) {
271 char c = fPattern.charAt(pos++);
274 if (pos >= fLength) {
277 char next = fPattern.charAt(pos++);
278 /* if it's an escape sequence */
279 if (next == '*' || next == '?' || next == '\\') {
282 /* not an escape sequence, just insert literally */
289 if (buf.length() > 0) {
291 temp.addElement(buf.toString());
292 fBound += buf.length();
297 /* append special character representing single match wildcard */
298 buf.append(fSingleWildCard);
305 /* add last buffer to segment list */
306 if (buf.length() > 0) {
307 temp.addElement(buf.toString());
308 fBound += buf.length();
311 fSegments = new String[temp.size()];
312 temp.copyInto(fSegments);
316 * @param <code>text</code>, a string which contains no wildcard
317 * @param <code>start</code>, the starting index in the text for search,
319 * @param <code>end</code>, the stopping point of search, exclusive
320 * @return the starting index in the text of the pattern , or -1 if not
323 protected int posIn(String text, int start, int end) {// no wild card in
325 int max = end - fLength;
328 int i = text.indexOf(fPattern, start);
329 if (i == -1 || i > max)
334 for (int i = start; i <= max; ++i) {
335 if (text.regionMatches(true, i, fPattern, 0, fLength))
343 * @param <code>text</code>, a simple regular expression that may only
345 * @param <code>start</code>, the starting index in the text for search,
347 * @param <code>end</code>, the stopping point of search, exclusive
348 * @param <code>p</code>, a simple regular expression that may contains '?'
349 * @param <code>caseIgnored</code>, wether the pattern is not casesensitive
350 * @return the starting index in the text of the pattern , or -1 if not
353 protected int regExpPosIn(String text, int start, int end, String p) {
354 int plen = p.length();
356 int max = end - plen;
357 for (int i = start; i <= max; ++i) {
358 if (regExpRegionMatches(text, i, p, 0, plen))
367 * @param <code>text</code>, a String to match
368 * @param <code>start</code>, int that indicates the starting index of
370 * @param <code>end</code> int that indicates the ending index of match,
372 * @param <code>p</code>, String, String, a simple regular expression that
374 * @param <code>ignoreCase</code>, boolean indicating wether code>p</code>
377 protected boolean regExpRegionMatches(String text, int tStart, String p,
378 int pStart, int plen) {
380 char tchar = text.charAt(tStart++);
381 char pchar = p.charAt(pStart++);
383 /* process wild cards */
384 if (!fIgnoreWildCards) {
385 /* skip single wild cards */
386 if (pchar == fSingleWildCard) {
393 if (Character.toUpperCase(tchar) == Character
396 // comparing after converting to upper case doesn't handle all
398 // also compare after converting to lower case
399 if (Character.toLowerCase(tchar) == Character
409 * @param <code>text</code>, the string to match
410 * @param <code>start</code>, the starting index in the text for search,
412 * @param <code>end</code>, the stopping point of search, exclusive
414 * </code>, a string that has no wildcard
416 * ignoreCase</code>, boolean indicating wether code>p</code>
418 * @return the starting index in the text of the pattern , or -1 if not
421 protected int textPosIn(String text, int start, int end, String p) {
423 int plen = p.length();
424 int max = end - plen;
427 int i = text.indexOf(p, start);
428 if (i == -1 || i > max)
433 for (int i = start; i <= max; ++i) {
434 if (text.regionMatches(true, i, p, 0, plen))