dc778c3e58f5f12c88e7c50f4b0f960aa2ef7586
[phpeclipse.git] / net.sourceforge.phpeclipse.ui / src / net / sourceforge / phpdt / internal / ui / util / StringMatcher.java
1 package net.sourceforge.phpdt.internal.ui.util;
2
3 import java.util.Vector;
4
5 /**
6  * A string pattern matcher, suppporting * and ? wildcards.
7  */
8 public class StringMatcher {
9         protected String fPattern;
10
11         protected int fLength; // pattern length
12
13         protected boolean fIgnoreWildCards;
14
15         protected boolean fIgnoreCase;
16
17         protected boolean fHasLeadingStar;
18
19         protected boolean fHasTrailingStar;
20
21         protected String fSegments[]; // the given pattern is split into *
22                                                                         // separated segments
23
24         /* boundary value beyond which we don't need to search in the text */
25         protected int fBound = 0;
26
27         protected static final char fSingleWildCard = '\u0000';
28
29         public static class Position {
30                 int start; // inclusive
31
32                 int end; // exclusive
33
34                 public Position(int start, int end) {
35                         this.start = start;
36                         this.end = end;
37                 }
38
39                 public int getStart() {
40                         return start;
41                 }
42
43                 public int getEnd() {
44                         return end;
45                 }
46         }
47
48         /**
49          * StringMatcher constructor takes in a String object that is a simple
50          * pattern which may contain �*� for 0 and many characters and �?� for
51          * exactly one character.
52          * 
53          * Literal '*' and '?' characters must be escaped in the pattern e.g., "\*"
54          * means literal "*", etc.
55          * 
56          * Escaping any other character (including the escape character itself),
57          * just results in that character in the pattern. e.g., "\a" means "a" and
58          * "\\" means "\"
59          * 
60          * If invoking the StringMatcher with string literals in Java, don't forget
61          * escape characters are represented by "\\".
62          * 
63          * @param pattern
64          *            the pattern to match text against
65          * @param ignoreCase
66          *            if true, case is ignored
67          * @param ignoreWildCards
68          *            if true, wild cards and their escape sequences are ignored
69          *            (everything is taken literally).
70          */
71         public StringMatcher(String pattern, boolean ignoreCase,
72                         boolean ignoreWildCards) {
73                 if (pattern == null)
74                         throw new IllegalArgumentException();
75                 fIgnoreCase = ignoreCase;
76                 fIgnoreWildCards = ignoreWildCards;
77                 fPattern = pattern;
78                 fLength = pattern.length();
79
80                 if (fIgnoreWildCards) {
81                         parseNoWildCards();
82                 } else {
83                         parseWildCards();
84                 }
85         }
86
87         /**
88          * Find the first occurrence of the pattern between
89          * <code>start</code)(inclusive) 
90          * and <code>end</code>(exclusive).  
91          * @param <code>text</code>, the String object to search in 
92          * @param <code>start</code>, the starting index of the search range, inclusive
93          * @param <code>end</code>, the ending index of the search range, exclusive
94          * @return an <code>StringMatcher.Position</code> object that keeps the starting 
95          * (inclusive) and ending positions (exclusive) of the first occurrence of the 
96          * pattern in the specified range of the text; return null if not found or subtext
97          * is empty (start==end). A pair of zeros is returned if pattern is empty string
98          * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
99          * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
100          */
101 //      public StringMatcher.Position find(String text, int start, int end) {
102 //              if (text == null)
103 //                      throw new IllegalArgumentException();
104 //
105 //              int tlen = text.length();
106 //              if (start < 0)
107 //                      start = 0;
108 //              if (end > tlen)
109 //                      end = tlen;
110 //              if (end < 0 || start >= end)
111 //                      return null;
112 //              if (fLength == 0)
113 //                      return new Position(start, start);
114 //              if (fIgnoreWildCards) {
115 //                      int x = posIn(text, start, end);
116 //                      if (x < 0)
117 //                              return null;
118 //                      return new Position(x, x + fLength);
119 //              }
120 //
121 //              int segCount = fSegments.length;
122 //              if (segCount == 0)// pattern contains only '*'(s)
123 //                      return new Position(start, end);
124 //
125 //              int curPos = start;
126 //              int matchStart = -1;
127 //              int i;
128 //              for (i = 0; i < segCount && curPos < end; ++i) {
129 //                      String current = fSegments[i];
130 //                      int nextMatch = regExpPosIn(text, curPos, end, current);
131 //                      if (nextMatch < 0)
132 //                              return null;
133 //                      if (i == 0)
134 //                              matchStart = nextMatch;
135 //                      curPos = nextMatch + current.length();
136 //              }
137 //              if (i < segCount)
138 //                      return null;
139 //              return new Position(matchStart, curPos);
140 //      }
141
142         /**
143          * match the given <code>text</code> with the pattern
144          * 
145          * @return true if matched eitherwise false
146          * @param <code>text</code>, a String object
147          */
148         public boolean match(String text) {
149                 return match(text, 0, text.length());
150         }
151
152         /**
153          * Given the starting (inclusive) and the ending (exclusive) positions in
154          * the <code>text</code>, determine if the given substring matches with
155          * aPattern
156          * 
157          * @return true if the specified portion of the text matches the pattern
158          * @param String
159          *            <code>text</code>, a String object that contains the
160          *            substring to match
161          * @param int
162          *            <code>start<code> marks the starting position (inclusive) of the substring
163          * @param int <code>end<code> marks the ending index (exclusive) of the substring
164          */
165         public boolean match(String text, int start, int end) {
166                 if (null == text)
167                         throw new IllegalArgumentException();
168
169                 if (start > end)
170                         return false;
171
172                 if (fIgnoreWildCards)
173                         return (end - start == fLength)
174                                         && fPattern.regionMatches(fIgnoreCase, 0, text, start,
175                                                         fLength);
176                 int segCount = fSegments.length;
177                 if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) // pattern
178                                                                                                                                         // contains
179                                                                                                                                         // only
180                                                                                                                                         // '*'(s)
181                         return true;
182                 if (start == end)
183                         return fLength == 0;
184                 if (fLength == 0)
185                         return start == end;
186
187                 int tlen = text.length();
188                 if (start < 0)
189                         start = 0;
190                 if (end > tlen)
191                         end = tlen;
192
193                 int tCurPos = start;
194                 int bound = end - fBound;
195                 if (bound < 0)
196                         return false;
197                 int i = 0;
198                 String current = fSegments[i];
199                 int segLength = current.length();
200
201                 /* process first segment */
202                 if (!fHasLeadingStar) {
203                         if (!regExpRegionMatches(text, start, current, 0, segLength)) {
204                                 return false;
205                         } else {
206                                 ++i;
207                                 tCurPos = tCurPos + segLength;
208                         }
209                 }
210
211                 /* process middle segments */
212                 while (i < segCount) {
213                         current = fSegments[i];
214                         int currentMatch;
215                         int k = current.indexOf(fSingleWildCard);
216                         if (k < 0) {
217                                 currentMatch = textPosIn(text, tCurPos, end, current);
218                                 if (currentMatch < 0)
219                                         return false;
220                         } else {
221                                 currentMatch = regExpPosIn(text, tCurPos, end, current);
222                                 if (currentMatch < 0)
223                                         return false;
224                         }
225                         tCurPos = currentMatch + current.length();
226                         i++;
227                 }
228
229                 /* process final segment */
230                 if (!fHasTrailingStar && tCurPos != end) {
231                         int clen = current.length();
232                         return regExpRegionMatches(text, end - clen, current, 0, clen);
233                 }
234                 return i == segCount;
235         }
236
237         /**
238          * This method parses the given pattern into segments seperated by wildcard
239          * '*' characters. Since wildcards are not being used in this case, the
240          * pattern consists of a single segment.
241          */
242         private void parseNoWildCards() {
243                 fSegments = new String[1];
244                 fSegments[0] = fPattern;
245                 fBound = fLength;
246         }
247
248         /**
249          * Parses the given pattern into segments seperated by wildcard '*'
250          * characters.
251          * 
252          * @param p,
253          *            a String object that is a simple regular expression with �*�
254          *            and/or �?�
255          */
256         private void parseWildCards() {
257                 if (fPattern.startsWith("*"))//$NON-NLS-1$
258                         fHasLeadingStar = true;
259                 if (fPattern.endsWith("*")) {//$NON-NLS-1$
260                         /* make sure it's not an escaped wildcard */
261                         if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
262                                 fHasTrailingStar = true;
263                         }
264                 }
265
266                 Vector temp = new Vector();
267
268                 int pos = 0;
269                 StringBuffer buf = new StringBuffer();
270                 while (pos < fLength) {
271                         char c = fPattern.charAt(pos++);
272                         switch (c) {
273                         case '\\':
274                                 if (pos >= fLength) {
275                                         buf.append(c);
276                                 } else {
277                                         char next = fPattern.charAt(pos++);
278                                         /* if it's an escape sequence */
279                                         if (next == '*' || next == '?' || next == '\\') {
280                                                 buf.append(next);
281                                         } else {
282                                                 /* not an escape sequence, just insert literally */
283                                                 buf.append(c);
284                                                 buf.append(next);
285                                         }
286                                 }
287                                 break;
288                         case '*':
289                                 if (buf.length() > 0) {
290                                         /* new segment */
291                                         temp.addElement(buf.toString());
292                                         fBound += buf.length();
293                                         buf.setLength(0);
294                                 }
295                                 break;
296                         case '?':
297                                 /* append special character representing single match wildcard */
298                                 buf.append(fSingleWildCard);
299                                 break;
300                         default:
301                                 buf.append(c);
302                         }
303                 }
304
305                 /* add last buffer to segment list */
306                 if (buf.length() > 0) {
307                         temp.addElement(buf.toString());
308                         fBound += buf.length();
309                 }
310
311                 fSegments = new String[temp.size()];
312                 temp.copyInto(fSegments);
313         }
314
315         /**
316          * @param <code>text</code>, a string which contains no wildcard
317          * @param <code>start</code>, the starting index in the text for search,
318          *            inclusive
319          * @param <code>end</code>, the stopping point of search, exclusive
320          * @return the starting index in the text of the pattern , or -1 if not
321          *         found
322          */
323 //      protected int posIn(String text, int start, int end) {// no wild card in
324 //                                                                                                                      // pattern
325 //              int max = end - fLength;
326 //
327 //              if (!fIgnoreCase) {
328 //                      int i = text.indexOf(fPattern, start);
329 //                      if (i == -1 || i > max)
330 //                              return -1;
331 //                      return i;
332 //              }
333 //
334 //              for (int i = start; i <= max; ++i) {
335 //                      if (text.regionMatches(true, i, fPattern, 0, fLength))
336 //                              return i;
337 //              }
338 //
339 //              return -1;
340 //      }
341
342         /**
343          * @param <code>text</code>, a simple regular expression that may only
344          *            contain '?'(s)
345          * @param <code>start</code>, the starting index in the text for search,
346          *            inclusive
347          * @param <code>end</code>, the stopping point of search, exclusive
348          * @param <code>p</code>, a simple regular expression that may contains '?'
349          * @param <code>caseIgnored</code>, wether the pattern is not casesensitive
350          * @return the starting index in the text of the pattern , or -1 if not
351          *         found
352          */
353         protected int regExpPosIn(String text, int start, int end, String p) {
354                 int plen = p.length();
355
356                 int max = end - plen;
357                 for (int i = start; i <= max; ++i) {
358                         if (regExpRegionMatches(text, i, p, 0, plen))
359                                 return i;
360                 }
361                 return -1;
362         }
363
364         /**
365          * 
366          * @return boolean
367          * @param <code>text</code>, a String to match
368          * @param <code>start</code>, int that indicates the starting index of
369          *            match, inclusive
370          * @param <code>end</code> int that indicates the ending index of match,
371          *            exclusive
372          * @param <code>p</code>, String, String, a simple regular expression that
373          *            may contain '?'
374          * @param <code>ignoreCase</code>, boolean indicating wether code>p</code>
375          *            is case sensitive
376          */
377         protected boolean regExpRegionMatches(String text, int tStart, String p,
378                         int pStart, int plen) {
379                 while (plen-- > 0) {
380                         char tchar = text.charAt(tStart++);
381                         char pchar = p.charAt(pStart++);
382
383                         /* process wild cards */
384                         if (!fIgnoreWildCards) {
385                                 /* skip single wild cards */
386                                 if (pchar == fSingleWildCard) {
387                                         continue;
388                                 }
389                         }
390                         if (pchar == tchar)
391                                 continue;
392                         if (fIgnoreCase) {
393                                 if (Character.toUpperCase(tchar) == Character
394                                                 .toUpperCase(pchar))
395                                         continue;
396                                 // comparing after converting to upper case doesn't handle all
397                                 // cases;
398                                 // also compare after converting to lower case
399                                 if (Character.toLowerCase(tchar) == Character
400                                                 .toLowerCase(pchar))
401                                         continue;
402                         }
403                         return false;
404                 }
405                 return true;
406         }
407
408         /**
409          * @param <code>text</code>, the string to match
410          * @param <code>start</code>, the starting index in the text for search,
411          *            inclusive
412          * @param <code>end</code>, the stopping point of search, exclusive
413          * @param code>p
414          *            </code>, a string that has no wildcard
415          * @param <code>
416          *            ignoreCase</code>, boolean indicating wether code>p</code>
417          *            is case sensitive
418          * @return the starting index in the text of the pattern , or -1 if not
419          *         found
420          */
421         protected int textPosIn(String text, int start, int end, String p) {
422
423                 int plen = p.length();
424                 int max = end - plen;
425
426                 if (!fIgnoreCase) {
427                         int i = text.indexOf(p, start);
428                         if (i == -1 || i > max)
429                                 return -1;
430                         return i;
431                 }
432
433                 for (int i = start; i <= max; ++i) {
434                         if (text.regionMatches(true, i, p, 0, plen))
435                                 return i;
436                 }
437
438                 return -1;
439         }
440 }