Tests for indexing a PHP AST with Lucene search engine;
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpeclipse / wizards / html / StringDivider.java
1 /*
2  * $Id: StringDivider.java,v 1.1 2004-10-05 20:51:57 jsurfer Exp $
3  * Copyright Narushima Hironori. All rights reserved.
4  */
5 package net.sourceforge.phpeclipse.wizards.html;
6
7 import java.util.Arrays;
8 import java.util.regex.Pattern;
9
10 public class StringDivider {
11
12         static Pattern tagNameChoosePattern = Pattern.compile("<[\\s/]*(\\w+)\\s*.*>");
13
14         String[] splitRegexpCandidates = {
15                 "\t",
16                 ",",
17                 "\\s",
18                 "\\s+",
19         };
20         
21         public StringDivider() {
22         }
23
24         public String[][] divide(String content){
25                 return divide(content, getDivideSuitedRegexp(content));
26         }
27         
28         public String[][] divide(String content, String regexp){
29                 String[] lines = content.split("\n");
30                 int len = lines.length;
31                 String[][] dist = new String[len][];
32                 
33                 int max = Integer.MIN_VALUE;
34                 for (int i = 0; i < len; i++) {
35                         String line = lines[i];
36                         String[] cells = line.split(regexp);
37                         dist[i] = cells;
38                         if(max < cells.length){
39                                 max = cells.length;
40                         }
41                 }
42                 for (int i = 0; i < len; i++) {
43                         String[] newArray = new String[max];
44                         Arrays.fill(newArray, "");
45                         System.arraycopy(dist[i], 0, newArray, 0, dist[i].length);
46                         dist[i] = newArray;
47                 }
48                 return dist;
49         }
50         
51         public String getDivideSuitedRegexp(String content){
52                 String[] lines = content.split("\n");
53                 
54                 String resultRegexp = null;
55                 int score = Integer.MAX_VALUE, cellCount = Integer.MIN_VALUE;
56                 
57                 for (int i = 0; i < splitRegexpCandidates.length; i++) {
58                         String regexp = splitRegexpCandidates[i];
59                         int max = Integer.MIN_VALUE, min = Integer.MAX_VALUE;
60                         for (int j = 0; j < lines.length; j++) {
61                                 String[] vals = lines[j].split(regexp);
62                                 if(max < vals.length){
63                                         max = vals.length;
64                                 }
65                                 if(min > vals.length){
66                                         min = vals.length;
67                                 }
68                         }
69                         int s = max - min;
70                         if( score > s || (score == s && max > cellCount)){
71                                 cellCount = max;
72                                 score = s;
73                                 resultRegexp = regexp;
74                         }
75                 }
76                 return resultRegexp;
77         }
78
79 }