1 package net.sourceforge.phpeclipse.wiki.actions.category;
3 import java.util.ArrayList;
5 public class ParseCategory {
8 public ParseCategory() {
9 titleList = new ArrayList();
11 public void parseCategory(String text) {
12 int index1 = text.indexOf("contentSub");
13 int index2 = text.indexOf("printfooter");
15 index2 = text.length();
17 if (index1 > 0 && index2 > index1) {
24 ch = text.charAt(i++);
28 if (ch == 't' && text.charAt(i) == 'i' && text.charAt(i +1) == 't' && text.charAt(i + 2) == 'l'
29 && text.charAt(i + 3) == 'e') {
35 ch = text.charAt(i++);
38 titleList.add(text.substring(titleStart,i-1));
47 } catch (IndexOutOfBoundsException e) {
54 public static void main(String[] args) {
55 String test = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\r\n"
56 + "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"de\" lang=\"de\" dir=\"ltr\">\r\n"
58 + " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>\r\n"
59 + " <meta name=\"KEYWORDS\" content=\"Kategorie:Eclipse Tips\" />\r\n"
60 + "<meta name=\"robots\" content=\"index,follow\" />\r\n"
61 + "<link rel=\"shortcut icon\" href=\"/favicon.ico\" />\r\n"
62 + "<link title=\"Creative Commons\" type=\"application/rdf+xml\" href=\"/index.php?title=Kategorie:Eclipse_Tips&action=creativecommons\" rel=\"meta\" />\r\n"
63 + "<link rel=\"copyright\" href=\"http://www.gnu.org/copyleft/fdl.html\" />\r\n"
65 + " <title>Kategorie:Eclipse Tips - Plog4u</title>\r\n"
66 + " <style type=\"text/css\" media=\"screen,projection\">/*<![CDATA[*/ @import \"/stylesheets/monobook/main.css\"; /*]]>*/</style>\r\n"
68 + " <link rel=\"stylesheet\" type=\"text/css\" media=\"print\" href=\"/stylesheets/commonPrint.css\"/>\r\n"
69 + " <!--[if IE]><style type=\"text/css\" media=\"all\">@import \"/stylesheets/monobook/IEFixes.css\";</style>\r\n"
70 + " <script type=\"text/javascript\" src=\"/stylesheets/IEFixes.js\"></script>\r\n"
71 + " <meta http-equiv=\"imagetoolbar\" content=\"no\" /><![endif]-->\r\n"
72 + " <script src=\"/index.php?title=-&action=raw&gen=js\" type=\"text/javascript\"></script>\r\n"
73 + " <script type=\"text/javascript\" src=\"/stylesheets/wikibits.js\"></script>\r\n"
74 + " <style type=\"text/css\">/*<![CDATA[*/ @import \"/index.php?title=-&action=raw&gen=css\";\r\n"
75 + " /*]]>*/</style>\r\n"
78 + " <body class=\"ns-14\">\r\n"
80 + " <div id=\"globalWrapper\">\r\n"
81 + " <div id=\"column-content\">\r\n"
82 + " <div id=\"content\">\r\n"
84 + " <a name=\"top\" id=\"contentTop\"></a>\r\n"
85 + " <h1 class=\"firstHeading\">Kategorie:Eclipse Tips</h1>\r\n"
86 + " <div id=\"bodyContent\">\r\n"
87 + " <h3 id=\"siteSub\">aus Plog4u, der freien Eclipse Wissensdatenbank</h3>\r\n"
88 + " <div id=\"contentSub\"></div>\r\n"
93 + " <!-- start content -->\r\n"
94 + " <p>(Dieser Artikel enthält momentan keinen Text)<br style=\"clear:both;\"/>\r\n"
96 + "<h2>Artikel in der Kategorie \"Eclipse Tips\"</h2>\r\n"
97 + "Dieser Kategorie gehören 6 Artikel an.<h3>B</h3>\r\n"
98 + "<ul><li><a href=\"/index.php/Benutzung:Eclipse:Tips:Eclipse_Sprachpaket\" title =\"Benutzung:Eclipse:Tips:Eclipse Sprachpaket\">Benutzung:Eclipse:Tips:Eclipse Sprachpaket</a></li><li><a href=\"/index.php/Benutzung:Eclipse:Tips:Eclipse_Starten\" title =\"Benutzung:Eclipse:Tips:Eclipse Starten\">Benutzung:Eclipse:Tips:Eclipse Starten</a></li><li><a href=\"/index.php/Benutzung:Eclipse:Tips:JDT:Pr%C3%A4fixe_f%C3%BCr_Klassenattribute\" title =\"Benutzung:Eclipse:Tips:JDT:Präfixe für Klassenattribute\">Benutzung:Eclipse:Tips:JDT:Präfixe für Klassenattribute</a></li><li><a href=\"/index.php/Benutzung:Eclipse:Tips:Plugins_aufrufen\" title =\"Benutzung:Eclipse:Tips:Plugins aufrufen\">Benutzung:Eclipse:Tips:Plugins aufrufen</a></li><li><a href=\"/index.php/Benutzung:Eclipse:Tips:Sourceforge_CVS_Zugriff\" title =\"Benutzung:Eclipse:Tips:Sourceforge CVS Zugriff\">Benutzung:Eclipse:Tips:Sourceforge CVS Zugriff</a></li><li><a href=\"/index.php/Benutzung:Eclipse:Tips:Stringvergleiche_in_JUnit\" title =\"Benutzung:Eclipse:Tips:Stringvergleiche in JUnit\">Benutzung:Eclipse:Tips:Stringvergleiche in JUnit</a></li></ul>\r\n"
100 + "<div class=\"printfooter\">\r\n"
101 + "Von \"<a href=\"http://www.plog4u.de/index.php/Kategorie:Eclipse_Tips\">http://www.plog4u.de/index.php/Kategorie:Eclipse_Tips</a>\"</div>\r\n"
104 + " <!-- end content -->\r\n"
105 + " <div class=\"visualClear\"></div>\r\n"
109 + " <div id=\"column-one\">\r\n"
111 + " <div id=\"p-cactions\" class=\"portlet\">\r\n"
112 + " <h5>Views</h5>\r\n"
114 + " <li id=\"ca-nstab-category\" class=\"selected\"><a href=\"/index.php/Kategorie:Eclipse_Tips\">Kategorie</a></li><li id=\"ca-talk\" class=\"new\"><a href=\"/index.php?title=Kategorie_Diskussion:Eclipse_Tips&action=edit\">Diskussion</a></li><li id=\"ca-edit\" class=\"\"><a href=\"/index.php?title=Kategorie:Eclipse_Tips&action=edit\">bearbeiten</a></li>\r\n"
117 + " <div class=\"portlet\" id=\"p-personal\">\r\n"
119 + " <h5>\'Persönliche Werkzeuge</h5>\r\n"
120 + " <div class=\"pBody\">\r\n"
122 + " <li id=\"pt-anonuserpage\"><a href=\"/index.php/Benutzer:217.252.6.161\" class=\"new\">217.252.6.161</a></li><li id=\"pt-anontalk\"><a href=\"/index.php/Benutzer_Diskussion:217.252.6.161\" class=\"new\">Diskussionsseite dieser IP</a></li><li id=\"pt-anonlogin\"><a href=\"/index.php?title=Spezial:Userlogin&returnto=Kategorie:Eclipse_Tips\">Anmelden</a></li>\r\n"
127 + "<!-- <div class=\"portlet\" id=\"p-logo\">\r\n"
128 + " <a style=\"background-image: url(/stylesheets/images/wiki.png);\" href=\"/index.php/Hauptseite\" i18n:attributes=\"title string:mainpage\"></a>\r\n"
130 + " <div class=\"portlet\" id=\"p-nav\">\r\n"
131 + " <h5>Navigation</h5>\r\n"
132 + " <div class=\"pBody\">\r\n"
134 + " <li id=\"n-mainpage\"><a href=\"/index.php/Hauptseite\">Hauptseite</a></li>\r\n"
135 + " <li id=\"n-portal\"><a href=\"/index.php/Plog4u:Portal\">Plog4u-Portal</a></li>\r\n"
136 + " <li id=\"n-currentevents\"><a href=\"/index.php/Aktuelle_Ereignisse\">Aktuelle Ereignisse</a></li>\r\n"
138 + " <li id=\"n-recentchanges\"><a href=\"/index.php/Spezial:Recentchanges\">Letzte Änderungen</a></li>\r\n"
139 + " <li id=\"n-randompage\"><a href=\"/index.php/Spezial:Randompage\">Zufälliger Artikel</a></li>\r\n"
140 + " <li id=\"n-help\"><a href=\"/index.php/Plog4u:Hilfe\">Hilfe</a></li>\r\n"
142 + "<li><a href=\"http://www.plog4u.de/impressum.html\">Impressum</a></li>\r\n"
147 + " <div id=\"p-search\" class=\"portlet\">\r\n"
148 + " <h5>Suche</h5>\r\n"
149 + " <div class=\"pBody\">\r\n"
150 + " <form name=\"searchform\" action=\"/index.php/Spezial:Search\" id=\"searchform\">\r\n"
151 + " <input accesskey=\"f\" id=\"searchInput\" name=\"search\" type=\"text\" />\r\n"
152 + " <input value=\"Los\" type=\"submit\" name=\"go\" class=\"searchButton\" /> <input value=\"Suche\" type=\"submit\" name=\"fulltext\" class=\"searchButton\" />\r\n"
157 + " <div class=\"portlet\" id=\"p-tb\">\r\n"
158 + " <h5>Werkzeuge</h5>\r\n"
159 + " <div class=\"pBody\">\r\n"
161 + " <li id=\"t-whatlinkshere\"><a href=\"/index.php?title=Spezial:Whatlinkshere&target=Kategorie%3AEclipse_Tips\">Was zeigt hierhin</a></li>\r\n"
162 + " <li id=\"t-recentchangeslinked\"><a href=\"/index.php?title=Spezial:Recentchangeslinked&target=Kategorie%3AEclipse_Tips\">Verlinkte Seiten</a></li>\r\n"
168 + " <li id=\"t-specialpages\"><a href=\"/index.php/Spezial:Specialpages\">Spezialseiten</a></li>\r\n"
173 + " <div class=\"portlet\" id=\"p-advertisement\">\r\n"
174 + " <h5>werbung</h5>\r\n"
175 + " <div class=\"pBody\">\r\n"
177 + "<script type=\"text/javascript\"><!--\r\n"
178 + "google_ad_client = \"pub-0182243963199149\";\r\n"
179 + "google_ad_width = 120;\r\n"
180 + "google_ad_height = 240;\r\n"
181 + "google_ad_format = \"120x240_as\";\r\n"
182 + "google_ad_channel =\"4781857343\";\r\n"
183 + "google_ad_language =\"de\";\r\n"
184 + "//--></script>\r\n"
185 + "<script type=\"text/javascript\" src=\"http://pagead2.googlesyndication.com/pagead/show_ads.js\">\r\n"
191 + " </div><!-- end of the left (by default at least) column -->\r\n"
192 + " <div class=\"visualClear\"></div>\r\n"
193 + " <div id=\"footer\">\r\n"
194 + " <div id=\"f-poweredbyico\"><a href=\"http://www.mediawiki.org/\"><img src=\"/stylesheets/images/poweredby_mediawiki_88x31.png\" alt=\"MediaWiki\" /></a></div>\r\n"
196 + " <div id=\"f-copyrightico\"><a href=\"http://www.gnu.org/copyleft/fdl.html\"><img src=\"/stylesheets/images/gnu-fdl.png\" alt=\'GNU Free Documentation License 1.2\' /></a></div>\r\n"
197 + " <ul id=\"f-list\">\r\n"
202 + " <li id=\"f-about\"><a href=\"/index.php/Plog4u:%C3%9Cber_Plog4u\" title =\"Plog4u:Über Plog4u\">Über Plog4u</a></li>\r\n"
203 + " <li id=\"f-disclaimer\"><a href=\"/index.php/Plog4u:Lizenzbestimmungen\" title =\"Plog4u:Lizenzbestimmungen\">Lizenzbestimmungen</a></li>\r\n"
207 + " <!-- Served by single in 0.09 secs. -->\r\n"
209 + " </body>\r\n" + "</html>\r\n" + "";
210 ParseCategory pc = new ParseCategory();
211 pc.parseCategory(test);
212 ArrayList list = pc.getTitleList();
213 for (int i = 0; i < list.size(); i++) {
214 System.out.println(list.get(i));
218 * @return Returns the titleList.
220 public ArrayList getTitleList() {