inital plugin from webtools project
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / ui / util / StringMatcher.java
1 package net.sourceforge.phpdt.internal.ui.util;
2
3 import java.util.Vector;
4
5
6 /**
7  * A string pattern matcher, suppporting * and ? wildcards.
8  */
9 public class StringMatcher  {
10         protected String fPattern;
11         protected int fLength; // pattern length
12         protected boolean fIgnoreWildCards;
13         protected boolean fIgnoreCase;
14         protected boolean fHasLeadingStar;
15         protected boolean fHasTrailingStar;
16         protected String fSegments[]; //the given pattern is split into * separated segments
17
18         /* boundary value beyond which we don't need to search in the text */
19         protected int fBound= 0;
20         
21
22         protected static final char fSingleWildCard= '\u0000';
23         
24         public static class Position {
25                 int start; //inclusive
26                 int end; //exclusive
27                 public Position(int start, int end) {
28                         this.start= start;
29                         this.end= end;
30                 }
31                 public int getStart() {
32                         return start;
33                 }
34                 public int getEnd() {
35                         return end;
36                 }
37         }
38         /**
39          * StringMatcher constructor takes in a String object that is a simple 
40          * pattern which may contain ‘*’ for 0 and many characters and
41          * ‘?’ for exactly one character.  
42          *
43          * Literal '*' and '?' characters must be escaped in the pattern 
44          * e.g., "\*" means literal "*", etc.
45          *
46          * Escaping any other character (including the escape character itself), 
47          * just results in that character in the pattern.
48          * e.g., "\a" means "a" and "\\" means "\"
49          *
50          * If invoking the StringMatcher with string literals in Java, don't forget
51          * escape characters are represented by "\\".
52          *
53          * @param pattern the pattern to match text against
54          * @param ignoreCase if true, case is ignored
55          * @param ignoreWildCards if true, wild cards and their escape sequences are ignored
56          *                (everything is taken literally).
57          */
58         public StringMatcher(String pattern, boolean ignoreCase, boolean ignoreWildCards) {
59                 if (pattern == null)
60                         throw new IllegalArgumentException();
61                 fIgnoreCase= ignoreCase;
62                 fIgnoreWildCards= ignoreWildCards;
63                 fPattern= pattern;
64                 fLength= pattern.length();
65                 
66                 if (fIgnoreWildCards) {
67                         parseNoWildCards();
68                 } else {
69                         parseWildCards();
70                 }
71         }
72         /**
73          * Find the first occurrence of the pattern between <code>start</code)(inclusive) 
74          * and <code>end</code>(exclusive).  
75          * @param <code>text</code>, the String object to search in 
76          * @param <code>start</code>, the starting index of the search range, inclusive
77          * @param <code>end</code>, the ending index of the search range, exclusive
78          * @return an <code>StringMatcher.Position</code> object that keeps the starting 
79          * (inclusive) and ending positions (exclusive) of the first occurrence of the 
80          * pattern in the specified range of the text; return null if not found or subtext
81          * is empty (start==end). A pair of zeros is returned if pattern is empty string
82          * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
83          * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
84          */
85         public StringMatcher.Position find(String text, int start, int end) {
86                 if (text == null)
87                         throw new IllegalArgumentException();
88                         
89                 int tlen= text.length();
90                 if (start < 0)
91                         start= 0;
92                 if (end > tlen)
93                         end= tlen;
94                 if (end < 0 ||start >= end )
95                         return null;
96                 if (fLength == 0)
97                         return new Position(start, start);
98                 if (fIgnoreWildCards) {
99                         int x= posIn(text, start, end);
100                         if (x < 0)
101                                 return null;
102                         return new Position(x, x+fLength);
103                 }
104
105                 int segCount= fSegments.length;
106                 if (segCount == 0)//pattern contains only '*'(s)
107                         return new Position (start, end);
108                                         
109                 int curPos= start;
110                 int matchStart= -1;
111                 int i;
112                 for (i= 0; i < segCount && curPos < end; ++i) {
113                         String current= fSegments[i];
114                         int nextMatch= regExpPosIn(text, curPos, end, current);
115                         if (nextMatch < 0 )
116                                 return null;
117                         if(i == 0)
118                                 matchStart= nextMatch;
119                         curPos= nextMatch + current.length();
120                 }
121                 if (i < segCount)
122                         return null;
123                 return new Position(matchStart, curPos);
124         }
125         /**
126          * match the given <code>text</code> with the pattern 
127          * @return true if matched eitherwise false
128          * @param <code>text</code>, a String object 
129          */
130         public boolean match(String text) {
131                 return match(text, 0, text.length());
132         }
133         /**
134          * Given the starting (inclusive) and the ending (exclusive) positions in the   
135          * <code>text</code>, determine if the given substring matches with aPattern  
136          * @return true if the specified portion of the text matches the pattern
137          * @param String <code>text</code>, a String object that contains the substring to match 
138          * @param int <code>start<code> marks the starting position (inclusive) of the substring
139          * @param int <code>end<code> marks the ending index (exclusive) of the substring 
140          */
141         public boolean match(String text, int start, int end) {
142                 if (null == text)
143                         throw new IllegalArgumentException();
144                         
145                 if (start > end)
146                         return false;
147                 
148                 if (fIgnoreWildCards)
149                         return (end - start == fLength) && fPattern.regionMatches(fIgnoreCase, 0, text, start, fLength);
150                 int segCount= fSegments.length;
151                 if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar))  // pattern contains only '*'(s)
152                         return true;
153                 if (start == end)
154                         return fLength == 0;
155                 if (fLength == 0)
156                         return start == end;    
157                  
158                 int tlen= text.length();
159                 if (start < 0)
160                         start= 0;
161                 if (end > tlen)
162                         end= tlen; 
163                                         
164                 int tCurPos= start;
165                 int bound= end - fBound;
166                 if ( bound < 0)
167                         return false;
168                 int i=0;
169                 String current= fSegments[i];
170                 int segLength= current.length();
171
172                 /* process first segment */
173                 if (!fHasLeadingStar){ 
174                         if(!regExpRegionMatches(text, start, current, 0, segLength)) {
175                                 return false;
176                         } else {
177                                 ++i;
178                                 tCurPos= tCurPos + segLength;
179                         }
180                 }
181
182                 /* process middle segments */   
183                 while (i < segCount) {
184                         current= fSegments[i];
185                         int currentMatch;
186                         int k= current.indexOf(fSingleWildCard);
187                         if (k < 0) {
188                                 currentMatch= textPosIn(text, tCurPos, end, current);
189                                 if (currentMatch < 0)
190                                         return false;
191                         } else { 
192                                 currentMatch= regExpPosIn(text, tCurPos, end, current);
193                                 if (currentMatch < 0)
194                                         return false;
195                         }
196                         tCurPos= currentMatch + current.length();
197                         i++;
198                 }
199
200                 /* process final segment */
201                 if (!fHasTrailingStar && tCurPos != end) {
202                         int clen= current.length();
203                         return regExpRegionMatches(text, end - clen, current, 0, clen);
204                 }
205                 return i == segCount ;
206         }
207         /**
208          * This method parses the given pattern into segments seperated by wildcard '*' characters.
209          * Since wildcards are not being used in this case, the pattern consists of a single segment.
210          */
211         private void parseNoWildCards() {
212                 fSegments= new String[1];
213                 fSegments[0]= fPattern;
214                 fBound= fLength;
215         }
216         /**
217          * Parses the given pattern into segments seperated by wildcard '*' characters.
218          * @param p, a String object that is a simple regular expression with ‘*’ and/or ‘?’
219          */
220         private void parseWildCards() {
221                 if(fPattern.startsWith("*"))//$NON-NLS-1$
222                         fHasLeadingStar= true;
223                 if(fPattern.endsWith("*")) {//$NON-NLS-1$
224                         /* make sure it's not an escaped wildcard */
225                         if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
226                                 fHasTrailingStar= true;
227                         }
228                 }
229
230                 Vector temp= new Vector();
231
232                 int pos= 0;
233                 StringBuffer buf= new StringBuffer();
234                 while (pos < fLength) {
235                         char c= fPattern.charAt(pos++);
236                         switch (c) {
237                                 case '\\':
238                                         if (pos >= fLength) {
239                                                 buf.append(c);
240                                         } else {
241                                                 char next= fPattern.charAt(pos++);
242                                                 /* if it's an escape sequence */
243                                                 if (next == '*' || next == '?' || next == '\\') {
244                                                         buf.append(next);
245                                                 } else {
246                                                         /* not an escape sequence, just insert literally */
247                                                         buf.append(c);
248                                                         buf.append(next);
249                                                 }
250                                         }
251                                 break;
252                                 case '*':
253                                         if (buf.length() > 0) {
254                                                 /* new segment */
255                                                 temp.addElement(buf.toString());
256                                                 fBound += buf.length();
257                                                 buf.setLength(0);
258                                         }
259                                 break;
260                                 case '?':
261                                         /* append special character representing single match wildcard */
262                                         buf.append(fSingleWildCard);
263                                 break;
264                                 default:
265                                         buf.append(c);
266                         }
267                 }
268
269                 /* add last buffer to segment list */
270                 if (buf.length() > 0) {
271                         temp.addElement(buf.toString());
272                         fBound += buf.length();
273                 }
274                         
275                 fSegments= new String[temp.size()];
276                 temp.copyInto(fSegments);
277         }
278         /** 
279          * @param <code>text</code>, a string which contains no wildcard
280          * @param <code>start</code>, the starting index in the text for search, inclusive
281          * @param <code>end</code>, the stopping point of search, exclusive
282          * @return the starting index in the text of the pattern , or -1 if not found 
283          */
284         protected int posIn(String text, int start, int end) {//no wild card in pattern
285                 int max= end - fLength;
286                 
287                 if (!fIgnoreCase) {
288                         int i= text.indexOf(fPattern, start);
289                         if (i == -1 || i > max)
290                                 return -1;
291                         return i;
292                 }
293                 
294                 for (int i= start; i <= max; ++i) {
295                         if (text.regionMatches(true, i, fPattern, 0, fLength))
296                                 return i;
297                 }
298                 
299                 return -1;
300         }
301         /** 
302          * @param <code>text</code>, a simple regular expression that may only contain '?'(s)
303          * @param <code>start</code>, the starting index in the text for search, inclusive
304          * @param <code>end</code>, the stopping point of search, exclusive
305          * @param <code>p</code>, a simple regular expression that may contains '?'
306          * @param <code>caseIgnored</code>, wether the pattern is not casesensitive
307          * @return the starting index in the text of the pattern , or -1 if not found 
308          */
309         protected int regExpPosIn(String text, int start, int end, String p) {
310                 int plen= p.length();
311                 
312                 int max= end - plen;
313                 for (int i= start; i <= max; ++i) {
314                         if (regExpRegionMatches(text, i, p, 0, plen))
315                                 return i;
316                 }
317                 return -1;
318         }
319         /**
320          * 
321          * @return boolean
322          * @param <code>text</code>, a String to match
323          * @param <code>start</code>, int that indicates the starting index of match, inclusive
324          * @param <code>end</code> int that indicates the ending index of match, exclusive
325          * @param <code>p</code>, String,  String, a simple regular expression that may contain '?'
326          * @param <code>ignoreCase</code>, boolean indicating wether code>p</code> is case sensitive
327          */
328         protected boolean regExpRegionMatches(String text, int tStart, String p, int pStart, int plen) {
329                 while (plen-- > 0) {
330                         char tchar= text.charAt(tStart++);
331                         char pchar= p.charAt(pStart++);
332
333                         /* process wild cards */
334                         if (!fIgnoreWildCards) {
335                                 /* skip single wild cards */
336                                 if (pchar == fSingleWildCard) {
337                                         continue;
338                                 }
339                         }
340                         if (pchar == tchar)
341                                 continue;
342                         if (fIgnoreCase) {
343                                 if (Character.toUpperCase(tchar) == Character.toUpperCase(pchar))
344                                         continue;
345                                 // comparing after converting to upper case doesn't handle all cases;
346                                 // also compare after converting to lower case
347                                 if (Character.toLowerCase(tchar) == Character.toLowerCase(pchar))
348                                         continue;
349                         }
350                         return false;
351                 }
352                 return true;
353         }
354         /** 
355          * @param <code>text</code>, the string to match
356          * @param <code>start</code>, the starting index in the text for search, inclusive
357          * @param <code>end</code>, the stopping point of search, exclusive
358          * @param code>p</code>, a string that has no wildcard
359          * @param <code>ignoreCase</code>, boolean indicating wether code>p</code> is case sensitive
360          * @return the starting index in the text of the pattern , or -1 if not found 
361          */
362         protected int textPosIn(String text, int start, int end, String p) { 
363                 
364                 int plen= p.length();
365                 int max= end - plen;
366                 
367                 if (!fIgnoreCase) {
368                         int i= text.indexOf(p, start);
369                         if (i == -1 || i > max)
370                                 return -1;
371                         return i;
372                 }
373                 
374                 for (int i= start; i <= max; ++i) {
375                         if (text.regionMatches(true, i, p, 0, plen))
376                                 return i;
377                 }
378                 
379                 return -1;
380         }
381 }