14a9b0becce32de8b5b6a478267f49108930372b
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / ui / text / JavaBreakIterator.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2004 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials 
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  * 
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  *******************************************************************************/
11 package net.sourceforge.phpdt.internal.ui.text;
12
13 import java.text.BreakIterator;
14 import java.text.CharacterIterator;
15
16 import org.eclipse.jface.text.Assert;
17
18
19 /**
20  * A java break iterator. It returns all breaks, including before and after 
21  * whitespace, and it returns all camelcase breaks.
22  * <p>
23  * A line break may be any of "\n", "\r", "\r\n", "\n\r".
24  * </p>
25  * 
26  * @since 3.0
27  */
28 public class JavaBreakIterator extends BreakIterator {
29
30         /**
31          * A run of common characters.
32          */
33         protected static abstract class Run {
34                 /** The length of this run. */
35                 protected int length;
36                 
37                 public Run() {
38                         init();
39                 }
40                 
41                 /**
42                  * Returns <code>true</code> if this run consumes <code>ch</code>,
43                  * <code>false</code> otherwise. If <code>true</code> is returned,
44                  * the length of the receiver is adjusted accordingly.
45                  * 
46                  * @param ch the character to test
47                  * @return <code>true</code> if <code>ch</code> was consumed
48                  */
49                 protected boolean consume(char ch) {
50                         if (isValid(ch)) {
51                                 length++;
52                                 return true;
53                         }
54                         return false;
55                 }
56                 
57                 /**
58                  * Whether this run accepts that character; does not update state. Called
59                  * from the default implementation of <code>consume</code>.
60                  * 
61                  * @param ch the character to test
62                  * @return <code>true</code> if <code>ch</code> is accepted
63                  */
64                 protected abstract boolean isValid(char ch);
65                 
66                 /**
67                  * Resets this run to the initial state.
68                  */
69                 protected void init() {
70                         length= 0;
71                 }
72         }
73         
74         static final class Whitespace extends Run {
75                 protected boolean isValid(char ch) {
76                         return Character.isWhitespace(ch) && ch != '\n' && ch != '\r';
77                 }
78         }
79         
80         static final class LineDelimiter extends Run {
81                 /** State: INIT -> delimiter -> EXIT. */
82                 private char fState;
83                 private static final char INIT= '\0';
84                 private static final char EXIT= '\1';
85                 
86                 /*
87                  * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#init()
88                  */
89                 protected void init() {
90                         super.init();
91                         fState= INIT;
92                 }
93                 
94                 /*
95                  * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#consume(char)
96                  */
97                 protected boolean consume(char ch) {
98                         if (!isValid(ch) || fState == EXIT)
99                                 return false;
100                         
101                         if (fState == INIT) {
102                                 fState= ch;
103                                 length++;
104                                 return true;
105                         } else if (fState != ch) {
106                                 fState= EXIT;
107                                 length++;
108                                 return true;
109                         } else {
110                                 return false;
111                         }
112                 }
113                 
114                 protected boolean isValid(char ch) {
115                         return ch == '\n' || ch == '\r';
116                 }
117         }
118         
119         static final class Identifier extends Run {
120                 /*
121                  * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
122                  */
123                 protected boolean isValid(char ch) {
124                         return Character.isJavaIdentifierPart(ch);
125                 }
126         }
127         
128         static final class CamelCaseIdentifier extends Run {
129                 /* states */
130                 private static final int S_INIT= 0;
131                 private static final int S_LOWER= 1;
132                 private static final int S_ONE_CAP= 2;
133                 private static final int S_ALL_CAPS= 3;
134                 private static final int S_EXIT= 4;
135                 private static final int S_EXIT_MINUS_ONE= 5;
136
137                 /* character types */
138                 private static final int K_INVALID= 0;
139                 private static final int K_LOWER= 1;
140                 private static final int K_UPPER= 2;
141                 private static final int K_OTHER= 3;
142                 
143                 private int fState;
144                 
145                 private final static int[][] MATRIX= new int[][] {
146                                 // K_INVALID, K_LOWER,           K_UPPER,    K_OTHER
147                                 {  S_EXIT,    S_LOWER,           S_ONE_CAP,  S_LOWER }, // S_INIT
148                                 {  S_EXIT,    S_LOWER,           S_EXIT,     S_LOWER }, // S_LOWER
149                                 {  S_EXIT,    S_LOWER,           S_ALL_CAPS, S_LOWER }, // S_ONE_CAP
150                                 {  S_EXIT,    S_EXIT_MINUS_ONE,  S_ALL_CAPS, S_LOWER }, // S_ALL_CAPS
151                 };
152                 
153                 /*
154                  * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#init()
155                  */
156                 protected void init() {
157                         super.init();
158                         fState= S_INIT;
159                 }
160                 
161                 /*
162                  * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#consumes(char)
163                  */
164                 protected boolean consume(char ch) {
165                         int kind= getKind(ch);
166                         fState= MATRIX[fState][kind];
167                         switch (fState) {
168                                 case S_LOWER:
169                                 case S_ONE_CAP:
170                                 case S_ALL_CAPS:
171                                         length++;
172                                         return true;
173                                 case S_EXIT:
174                                         return false;
175                                 case S_EXIT_MINUS_ONE:
176                                         length--;
177                                         return false;
178                                 default:
179                                         Assert.isTrue(false);
180                                         return false;
181                         }
182                 }
183                 
184                 /**
185                  * Determines the kind of a character.
186                  * 
187                  * @param ch the character to test
188                  */
189                 private int getKind(char ch) {
190                         if (Character.isUpperCase(ch))
191                                 return K_UPPER;
192                         if (Character.isLowerCase(ch))
193                                 return K_LOWER;
194                         if (Character.isJavaIdentifierPart(ch)) // _, digits...
195                                 return K_OTHER;
196                         return K_INVALID;
197                 }
198
199                 /*
200                  * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
201                  */
202                 protected boolean isValid(char ch) {
203                         return Character.isJavaIdentifierPart(ch);
204                 }
205         }
206
207         static final class Other extends Run {
208                 /*
209                  * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
210                  */
211                 protected boolean isValid(char ch) {
212                         return !Character.isWhitespace(ch) && !Character.isJavaIdentifierPart(ch);
213                 }
214         }
215         
216         private static final Run WHITESPACE= new Whitespace();
217         private static final Run DELIMITER= new LineDelimiter();
218         private static final Run CAMELCASE= new CamelCaseIdentifier(); // new Identifier();
219         private static final Run OTHER= new Other();
220         
221         /** The platform break iterator (word instance) used as a base. */ 
222         protected final BreakIterator fIterator;
223         /** The text we operate on. */
224         protected CharSequence fText;
225         /** our current position for the stateful methods. */
226         private int fIndex;
227         
228         
229         /**
230          * Creates a new break iterator.
231          */
232         public JavaBreakIterator() {
233                 fIterator= BreakIterator.getWordInstance();
234                 fIndex= fIterator.current();
235         }
236
237         /*
238          * @see java.text.BreakIterator#current()
239          */
240         public int current() {
241                 return fIndex;
242         }
243
244         /*
245          * @see java.text.BreakIterator#first()
246          */
247         public int first() {
248                 fIndex= fIterator.first();
249                 return fIndex;
250         }
251
252         /*
253          * @see java.text.BreakIterator#following(int)
254          */
255         public int following(int offset) {
256                 // work around too eager IAEs in standard impl
257                 if (offset == getText().getEndIndex())
258                         return DONE;
259                 
260                 int next= fIterator.following(offset);
261                 if (next == DONE)
262                         return DONE;
263                 
264                 // TODO deal with complex script word boundaries
265                 // Math.min(offset + run.length, next) does not work
266                 // since wordinstance considers _ as boundaries
267                 // seems to work fine, however
268                 Run run= consumeRun(offset);
269                 return offset + run.length;
270                 
271         }
272
273         /**
274          * Consumes a run of characters at the limits of which we introduce a break.
275          * @param offset the offset to start at
276          * @return the run that was consumed
277          */
278         private Run consumeRun(int offset) {
279                 // assert offset < length
280                 
281                 char ch= fText.charAt(offset);
282                 int length= fText.length();
283                 Run run= getRun(ch);
284                 while (run.consume(ch) && offset < length - 1) {
285                         offset++;
286                         ch= fText.charAt(offset);
287                 }
288                 
289                 return run;
290         }
291
292         /**
293          * Retunrs a run based on a character.
294          * 
295          * @param ch the character to test
296          * @return the correct character given <code>ch</code>
297          */
298         private Run getRun(char ch) {
299                 Run run;
300                 if (WHITESPACE.isValid(ch))
301                         run= WHITESPACE;
302                 else if (DELIMITER.isValid(ch))
303                         run= DELIMITER;
304                 else if (CAMELCASE.isValid(ch))
305                         run= CAMELCASE;
306                 else if (OTHER.isValid(ch))
307                         run= OTHER;
308                 else {
309                         Assert.isTrue(false);
310                         return null;
311                 }
312                 
313                 run.init();
314                 return run;
315         }
316         
317         /*
318          * @see java.text.BreakIterator#getText()
319          */
320         public CharacterIterator getText() {
321                 return fIterator.getText();
322         }
323
324         /*
325          * @see java.text.BreakIterator#isBoundary(int)
326          */
327         public boolean isBoundary(int offset) {
328         if (offset == getText().getBeginIndex())
329             return true;
330         else
331             return following(offset - 1) == offset;
332         }
333
334         /*
335          * @see java.text.BreakIterator#last()
336          */
337         public int last() {
338                 fIndex= fIterator.last();
339                 return fIndex;
340         }
341
342         /*
343          * @see java.text.BreakIterator#next()
344          */
345         public int next() {
346                 fIndex= following(fIndex);
347                 return fIndex;
348         }
349
350         /*
351          * @see java.text.BreakIterator#next(int)
352          */
353         public int next(int n) {
354                 return fIterator.next(n);
355         }
356         
357         /*
358          * @see java.text.BreakIterator#preceding(int)
359          */
360         public int preceding(int offset) {
361                 if (offset == getText().getBeginIndex())
362                         return DONE;
363                 
364                 if (isBoundary(offset - 1))
365                         return offset - 1;
366
367                 int previous= offset - 1;
368                 do {
369                         previous= fIterator.preceding(previous);
370                 } while (!isBoundary(previous));
371                 
372                 int last= DONE;
373                 while (previous < offset) {
374                         last= previous;
375                         previous= following(previous);
376                 }
377                 
378                 return last;
379         }
380
381         /*
382          * @see java.text.BreakIterator#previous()
383          */
384         public int previous() {
385                 fIndex= preceding(fIndex);
386                 return fIndex;
387         }
388
389         /*
390          * @see java.text.BreakIterator#setText(java.lang.String)
391          */
392         public void setText(String newText) {
393                 setText((CharSequence) newText);
394         }
395
396         /**
397          * Creates a break iterator given a char sequence.
398          * @param newText the new text
399          */
400         public void setText(CharSequence newText) {
401                 fText= newText;
402                 fIterator.setText(new SequenceCharacterIterator(newText));
403                 first();
404         }
405
406         /*
407          * @see java.text.BreakIterator#setText(java.text.CharacterIterator)
408          */
409         public void setText(CharacterIterator newText) {
410                 if (newText instanceof CharSequence) {
411                         fText= (CharSequence) newText;
412                         fIterator.setText(newText);
413                         first();
414                 } else {
415                         throw new UnsupportedOperationException("CharacterIterator not supported"); //$NON-NLS-1$
416                 }
417         }
418 }