988f842e1a4069299b89db0b01edf08d94a83ea1
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / externaltools / model / StringMatcher.java
1 package net.sourceforge.phpdt.externaltools.model;
2 /**********************************************************************
3 Copyright (c) 2000, 2002 IBM Corp.  All rights reserved.
4 This file is made available under the terms of the Common Public License v1.0
5 which accompanies this distribution, and is available at
6 http://www.eclipse.org/legal/cpl-v10.html
7 **********************************************************************/
8
9 import java.util.Vector;
10
11 /**
12  * Copied from org.eclipse.jdt.internal.ui.util.StringMatcher
13  * 
14  *  A string pattern matcher, suppporting * and ? wildcards.
15  */
16 public class StringMatcher {
17         protected String fPattern;
18         protected int fLength; // pattern length
19         protected boolean fIgnoreWildCards;
20         protected boolean fIgnoreCase;
21         protected boolean fHasLeadingStar;
22         protected boolean fHasTrailingStar;
23         protected String fSegments[]; //the given pattern is split into * separated segments
24
25         /* boundary value beyond which we don't need to search in the text */
26         protected int fBound= 0;
27
28
29         protected static final char fSingleWildCard= '\u0000';
30
31         public static class Position {
32                 int start; //inclusive
33                 int end; //exclusive
34                 public Position(int start, int end) {
35                         this.start= start;
36                         this.end= end;
37                 }
38                 public int getStart() {
39                         return start;
40                 }
41                 public int getEnd() {
42                         return end;
43                 }
44         }
45         /**
46          * StringMatcher constructor takes in a String object that is a simple
47          * pattern which may contain ‘*’ for 0 and many characters and
48          * ‘?’ for exactly one character.
49          *
50          * Literal '*' and '?' characters must be escaped in the pattern
51          * e.g., "\*" means literal "*", etc.
52          *
53          * Escaping any other character (including the escape character itself),
54          * just results in that character in the pattern.
55          * e.g., "\a" means "a" and "\\" means "\"
56          *
57          * If invoking the StringMatcher with string literals in Java, don't forget
58          * escape characters are represented by "\\".
59          *
60          * @param pattern the pattern to match text against
61          * @param ignoreCase if true, case is ignored
62          * @param ignoreWildCards if true, wild cards and their escape sequences are ignored
63          *                (everything is taken literally).
64          */
65         public StringMatcher(String pattern, boolean ignoreCase, boolean ignoreWildCards) {
66                 if (pattern == null)
67                         throw new IllegalArgumentException();
68                 fIgnoreCase= ignoreCase;
69                 fIgnoreWildCards= ignoreWildCards;
70                 fPattern= pattern;
71                 fLength= pattern.length();
72
73                 if (fIgnoreWildCards) {
74                         parseNoWildCards();
75                 } else {
76                         parseWildCards();
77                 }
78         }
79         /**
80          * Find the first occurrence of the pattern between <code>start</code)(inclusive)
81          * and <code>end</code>(exclusive).
82          * @param <code>text</code>, the String object to search in
83          * @param <code>start</code>, the starting index of the search range, inclusive
84          * @param <code>end</code>, the ending index of the search range, exclusive
85          * @return an <code>StringMatcher.Position</code> object that keeps the starting
86          * (inclusive) and ending positions (exclusive) of the first occurrence of the
87          * pattern in the specified range of the text; return null if not found or subtext
88          * is empty (start==end). A pair of zeros is returned if pattern is empty string
89          * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
90          * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
91          */
92         public StringMatcher.Position find(String text, int start, int end) {
93                 if (text == null)
94                         throw new IllegalArgumentException();
95
96                 int tlen= text.length();
97                 if (start < 0)
98                         start= 0;
99                 if (end > tlen)
100                         end= tlen;
101                 if (end < 0 ||start >= end )
102                         return null;
103                 if (fLength == 0)
104                         return new Position(start, start);
105                 if (fIgnoreWildCards) {
106                         int x= posIn(text, start, end);
107                         if (x < 0)
108                                 return null;
109                         return new Position(x, x+fLength);
110                 }
111
112                 int segCount= fSegments.length;
113                 if (segCount == 0)//pattern contains only '*'(s)
114                         return new Position (start, end);
115
116                 int curPos= start;
117                 int matchStart= -1;
118                 int i;
119                 for (i= 0; i < segCount && curPos < end; ++i) {
120                         String current= fSegments[i];
121                         int nextMatch= regExpPosIn(text, curPos, end, current);
122                         if (nextMatch < 0 )
123                                 return null;
124                         if(i == 0)
125                                 matchStart= nextMatch;
126                         curPos= nextMatch + current.length();
127                 }
128                 if (i < segCount)
129                         return null;
130                 return new Position(matchStart, curPos);
131         }
132         /**
133          * match the given <code>text</code> with the pattern
134          * @return true if matched eitherwise false
135          * @param <code>text</code>, a String object
136          */
137         public boolean match(String text) {
138                 return match(text, 0, text.length());
139         }
140         /**
141          * Given the starting (inclusive) and the ending (exclusive) positions in the
142          * <code>text</code>, determine if the given substring matches with aPattern
143          * @return true if the specified portion of the text matches the pattern
144          * @param String <code>text</code>, a String object that contains the substring to match
145          * @param int <code>start<code> marks the starting position (inclusive) of the substring
146          * @param int <code>end<code> marks the ending index (exclusive) of the substring
147          */
148         public boolean match(String text, int start, int end) {
149                 if (null == text)
150                         throw new IllegalArgumentException();
151
152                 if (start > end)
153                         return false;
154
155                 if (fIgnoreWildCards)
156                         return (end - start == fLength) && fPattern.regionMatches(fIgnoreCase, 0, text, start, fLength);
157                 int segCount= fSegments.length;
158                 if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar))  // pattern contains only '*'(s)
159                         return true;
160                 if (start == end)
161                         return fLength == 0;
162                 if (fLength == 0)
163                         return start == end;
164
165                 int tlen= text.length();
166                 if (start < 0)
167                         start= 0;
168                 if (end > tlen)
169                         end= tlen;
170
171                 int tCurPos= start;
172                 int bound= end - fBound;
173                 if ( bound < 0)
174                         return false;
175                 int i=0;
176                 String current= fSegments[i];
177                 int segLength= current.length();
178
179                 /* process first segment */
180                 if (!fHasLeadingStar){
181                         if(!regExpRegionMatches(text, start, current, 0, segLength)) {
182                                 return false;
183                         } else {
184                                 ++i;
185                                 tCurPos= tCurPos + segLength;
186                         }
187                 }
188
189                 /* process middle segments */
190                 while (i < segCount) {
191                         current= fSegments[i];
192                         int currentMatch;
193                         int k= current.indexOf(fSingleWildCard);
194                         if (k < 0) {
195                                 currentMatch= textPosIn(text, tCurPos, end, current);
196                                 if (currentMatch < 0)
197                                         return false;
198                         } else {
199                                 currentMatch= regExpPosIn(text, tCurPos, end, current);
200                                 if (currentMatch < 0)
201                                         return false;
202                         }
203                         tCurPos= currentMatch + current.length();
204                         i++;
205                 }
206
207                 /* process final segment */
208                 if (!fHasTrailingStar && tCurPos != end) {
209                         int clen= current.length();
210                         return regExpRegionMatches(text, end - clen, current, 0, clen);
211                 }
212                 return i == segCount ;
213         }
214         /**
215          * This method parses the given pattern into segments seperated by wildcard '*' characters.
216          * Since wildcards are not being used in this case, the pattern consists of a single segment.
217          */
218         private void parseNoWildCards() {
219                 fSegments= new String[1];
220                 fSegments[0]= fPattern;
221                 fBound= fLength;
222         }
223         /**
224          * Parses the given pattern into segments seperated by wildcard '*' characters.
225          * @param p, a String object that is a simple regular expression with ‘*’ and/or ‘?’
226          */
227         private void parseWildCards() {
228                 if(fPattern.startsWith("*"))//$NON-NLS-1$
229                         fHasLeadingStar= true;
230                 if(fPattern.endsWith("*")) {//$NON-NLS-1$
231                         /* make sure it's not an escaped wildcard */
232                         if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
233                                 fHasTrailingStar= true;
234                         }
235                 }
236
237                 Vector temp= new Vector();
238
239                 int pos= 0;
240                 StringBuffer buf= new StringBuffer();
241                 while (pos < fLength) {
242                         char c= fPattern.charAt(pos++);
243                         switch (c) {
244                                 case '\\':
245                                         if (pos >= fLength) {
246                                                 buf.append(c);
247                                         } else {
248                                                 char next= fPattern.charAt(pos++);
249                                                 /* if it's an escape sequence */
250                                                 if (next == '*' || next == '?' || next == '\\') {
251                                                         buf.append(next);
252                                                 } else {
253                                                         /* not an escape sequence, just insert literally */
254                                                         buf.append(c);
255                                                         buf.append(next);
256                                                 }
257                                         }
258                                 break;
259                                 case '*':
260                                         if (buf.length() > 0) {
261                                                 /* new segment */
262                                                 temp.addElement(buf.toString());
263                                                 fBound += buf.length();
264                                                 buf.setLength(0);
265                                         }
266                                 break;
267                                 case '?':
268                                         /* append special character representing single match wildcard */
269                                         buf.append(fSingleWildCard);
270                                 break;
271                                 default:
272                                         buf.append(c);
273                         }
274                 }
275
276                 /* add last buffer to segment list */
277                 if (buf.length() > 0) {
278                         temp.addElement(buf.toString());
279                         fBound += buf.length();
280                 }
281
282                 fSegments= new String[temp.size()];
283                 temp.copyInto(fSegments);
284         }
285         /**
286          * @param <code>text</code>, a string which contains no wildcard
287          * @param <code>start</code>, the starting index in the text for search, inclusive
288          * @param <code>end</code>, the stopping point of search, exclusive
289          * @return the starting index in the text of the pattern , or -1 if not found
290          */
291         protected int posIn(String text, int start, int end) {//no wild card in pattern
292                 int max= end - fLength;
293
294                 if (!fIgnoreCase) {
295                         int i= text.indexOf(fPattern, start);
296                         if (i == -1 || i > max)
297                                 return -1;
298                         return i;
299                 }
300
301                 for (int i= start; i <= max; ++i) {
302                         if (text.regionMatches(true, i, fPattern, 0, fLength))
303                                 return i;
304                 }
305
306                 return -1;
307         }
308         /**
309          * @param <code>text</code>, a simple regular expression that may only contain '?'(s)
310          * @param <code>start</code>, the starting index in the text for search, inclusive
311          * @param <code>end</code>, the stopping point of search, exclusive
312          * @param <code>p</code>, a simple regular expression that may contains '?'
313          * @param <code>caseIgnored</code>, wether the pattern is not casesensitive
314          * @return the starting index in the text of the pattern , or -1 if not found
315          */
316         protected int regExpPosIn(String text, int start, int end, String p) {
317                 int plen= p.length();
318
319                 int max= end - plen;
320                 for (int i= start; i <= max; ++i) {
321                         if (regExpRegionMatches(text, i, p, 0, plen))
322                                 return i;
323                 }
324                 return -1;
325         }
326         /**
327          *
328          * @return boolean
329          * @param <code>text</code>, a String to match
330          * @param <code>start</code>, int that indicates the starting index of match, inclusive
331          * @param <code>end</code> int that indicates the ending index of match, exclusive
332          * @param <code>p</code>, String,  String, a simple regular expression that may contain '?'
333          * @param <code>ignoreCase</code>, boolean indicating wether code>p</code> is case sensitive
334          */
335         protected boolean regExpRegionMatches(String text, int tStart, String p, int pStart, int plen) {
336                 while (plen-- > 0) {
337                         char tchar= text.charAt(tStart++);
338                         char pchar= p.charAt(pStart++);
339
340                         /* process wild cards */
341                         if (!fIgnoreWildCards) {
342                                 /* skip single wild cards */
343                                 if (pchar == fSingleWildCard) {
344                                         continue;
345                                 }
346                         }
347                         if (pchar == tchar)
348                                 continue;
349                         if (fIgnoreCase) {
350                                 if (Character.toUpperCase(tchar) == Character.toUpperCase(pchar))
351                                         continue;
352                                 // comparing after converting to upper case doesn't handle all cases;
353                                 // also compare after converting to lower case
354                                 if (Character.toLowerCase(tchar) == Character.toLowerCase(pchar))
355                                         continue;
356                         }
357                         return false;
358                 }
359                 return true;
360         }
361         /**
362          * @param <code>text</code>, the string to match
363          * @param <code>start</code>, the starting index in the text for search, inclusive
364          * @param <code>end</code>, the stopping point of search, exclusive
365          * @param code>p</code>, a string that has no wildcard
366          * @param <code>ignoreCase</code>, boolean indicating wether code>p</code> is case sensitive
367          * @return the starting index in the text of the pattern , or -1 if not found
368          */
369         protected int textPosIn(String text, int start, int end, String p) {
370
371                 int plen= p.length();
372                 int max= end - plen;
373
374                 if (!fIgnoreCase) {
375                         int i= text.indexOf(p, start);
376                         if (i == -1 || i > max)
377                                 return -1;
378                         return i;
379                 }
380
381                 for (int i= start; i <= max; ++i) {
382                         if (text.regionMatches(true, i, p, 0, plen))
383                                 return i;
384                 }
385
386                 return -1;
387         }
388 }