Added
Link Here
|
1 |
/* |
2 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. |
3 |
* |
4 |
* Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
5 |
* |
6 |
* The contents of this file are subject to the terms of either the GNU |
7 |
* General Public License Version 2 only ("GPL") or the Common |
8 |
* Development and Distribution License("CDDL") (collectively, the |
9 |
* "License"). You may not use this file except in compliance with the |
10 |
* License. You can obtain a copy of the License at |
11 |
* http://www.netbeans.org/cddl-gplv2.html |
12 |
* or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the |
13 |
* specific language governing permissions and limitations under the |
14 |
* License. When distributing the software, include this License Header |
15 |
* Notice in each file and include the License file at |
16 |
* nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this |
17 |
* particular file as subject to the "Classpath" exception as provided |
18 |
* by Sun in the GPL Version 2 section of the License file that |
19 |
* accompanied this code. If applicable, add the following below the |
20 |
* License Header, with the fields enclosed by brackets [] replaced by |
21 |
* your own identifying information: |
22 |
* "Portions Copyrighted [year] [name of copyright owner]" |
23 |
* |
24 |
* If you wish your version of this file to be governed by only the CDDL |
25 |
* or only the GPL Version 2, indicate your decision by adding |
26 |
* "[Contributor] elects to include this software in this distribution |
27 |
* under the [CDDL or GPL Version 2] license." If you do not indicate a |
28 |
* single choice of license, a recipient has the option to distribute |
29 |
* your version of this file under either the CDDL, the GPL Version 2 or |
30 |
* to extend the choice of license to its licensees as provided above. |
31 |
* However, if you add GPL Version 2 code and therefore, elected the GPL |
32 |
* Version 2 license, then the option applies only if the new code is |
33 |
* made subject to such option by the copyright holder. |
34 |
* |
35 |
* Contributor(s): |
36 |
* |
37 |
* Portions Copyrighted 2009 Sun Microsystems, Inc. |
38 |
*/ |
39 |
|
40 |
package org.netbeans.nbbuild; |
41 |
|
42 |
import java.io.IOException; |
43 |
import java.net.URI; |
44 |
import java.net.URISyntaxException; |
45 |
import java.text.MessageFormat; |
46 |
import java.util.ArrayList; |
47 |
import java.util.Iterator; |
48 |
import java.util.List; |
49 |
import java.util.regex.Matcher; |
50 |
import java.util.regex.Pattern; |
51 |
import org.apache.tools.ant.Project; |
52 |
|
53 |
/** |
54 |
* Checker for the <object> elements. |
55 |
* The <object> elements may be used by the HelpSet authors in the HTML |
56 |
* sources. |
57 |
* By default the checker is aware about the following classes of the |
58 |
* <object> elements: |
59 |
* <ul> |
60 |
* <li><code>java:org.netbeans.modules.javahelp.BrowserDisplayer</code> |
61 |
* Testable URI value is contained in a <param> element with the |
62 |
* name <code>content</name>. |
63 |
* </li> |
64 |
* <li><code>java:com.sun.java.help.impl.JHSecondaryViewer</code> |
65 |
* Testable URI value is contained in a <param> element with the |
66 |
* name <code>content</name>. |
67 |
* </li> |
68 |
* </ul> |
69 |
* |
70 |
* @version 1.0 |
71 |
* |
72 |
* @see <a href="http://www.netbeans.org/issues/show_bug.cgi?id=117506"> |
73 |
* Issue #117506</a> |
74 |
* @see <a href="http://wiki.netbeans.org/JavaHelp_CheckHelpSets"> |
75 |
* JavaHelp - Help Set Checkers</a> |
76 |
* |
77 |
* @author Victor G. Vasilyev <vvg@netbeans.org> |
78 |
* |
79 |
* TODO: |
80 |
* - TBD It will be better to move all the help set checkers to the separate |
81 |
* package org.netbeans.nbbuild.helpsets |
82 |
* - Getting a name list of the <object> properties as an input to provide |
83 |
* checking of their values for correctness of URLs. |
84 |
* The list should be passed via Ant property. |
85 |
* - Getting a <object> classid list as an input to provide |
86 |
* exclusion of this object from checking. Take the list into account in the |
87 |
* method check(String content). The list should be passed via Ant property. |
88 |
*/ |
89 |
public class HTMLObjectElementsChecker { |
90 |
|
91 |
/** |
92 |
* The separator used in the identifiers of the <param> elements |
93 |
* to separate <code>classid</code> from a name of the <param> |
94 |
* element. |
95 |
* |
96 |
* @see #getParamID(java.lang.String, java.lang.String) |
97 |
*/ |
98 |
public static final String PARAM_NAME_SEPARATOR = "."; |
99 |
|
100 |
private static final String ERROR_PREFIX = "HTML <object> element error: "; |
101 |
|
102 |
/** |
103 |
* The pattern for the message: |
104 |
* "The element without classid attribute." |
105 |
* where: |
106 |
* <ul> |
107 |
* <li>{0} - a URI string.</li> |
108 |
* <li>{1} - a string value of the offset of the buggy text in the HTML |
109 |
* file.</li> |
110 |
* <li>{2} - a buggy text.</li> |
111 |
* </ul> |
112 |
*/ |
113 |
private static final String ERROR1 = |
114 |
"The element without classid attribute. \n" + |
115 |
"file:\n" + |
116 |
"{0}\n" + |
117 |
"offset: {1}\n" + |
118 |
"text: \n" + |
119 |
"{2}"; |
120 |
|
121 |
/** |
122 |
* The pattern for the message: |
123 |
* "The element with unknown classid:" |
124 |
* where: |
125 |
* <ul> |
126 |
* <li>{0} - a string value of the classid attrute.</li> |
127 |
* <li>{1} - a URI string.</li> |
128 |
* <li>{2} - a string value of the offset of the buggy text in the HTML |
129 |
* file.</li> |
130 |
* <li>{3} - a buggy text.</li> |
131 |
* </ul> |
132 |
*/ |
133 |
private static final String ERROR2 = |
134 |
"The element with unknown classid: \n" + |
135 |
"{0}\n" + |
136 |
"file:\n" + |
137 |
"{1}\n" + |
138 |
"offset: {2}\n" + |
139 |
"text: \n" + |
140 |
"{3}"; |
141 |
|
142 |
/** |
143 |
* The pattern for the message: |
144 |
* "The <param> element with unknown name." |
145 |
* where: |
146 |
* <ul> |
147 |
* <li>{0} - a text of <param> element.</li> |
148 |
* <li>{1} - a URI string.</li> |
149 |
* <li>{2} - a string value of the offset of the buggy text in the HTML |
150 |
* file.</li> |
151 |
* <li>{3} - a buggy text.</li> |
152 |
* </ul> |
153 |
*/ |
154 |
private static final String ERROR3 = |
155 |
"The <param> element with unknown name. \n" + |
156 |
"{0}\n" + |
157 |
"file:\n" + |
158 |
"{1}\n" + |
159 |
"offset: {2}\n" + |
160 |
"text: \n" + |
161 |
"{3}"; |
162 |
|
163 |
/** |
164 |
* The pattern for the message: |
165 |
* "The <param> element with unknown value." |
166 |
* where: |
167 |
* <ul> |
168 |
* <li>{0} - a text of <param> element.</li> |
169 |
* <li>{1} - a URI string.</li> |
170 |
* <li>{2} - a string value of the offset of the buggy text in the HTML |
171 |
* file.</li> |
172 |
* <li>{3} - a buggy text.</li> |
173 |
* </ul> |
174 |
*/ |
175 |
private static final String ERROR4 = |
176 |
"The <param> element with unknown value. \n" + |
177 |
"{0}\n" + |
178 |
"file:\n" + |
179 |
"{1}\n" + |
180 |
"offset: {2}\n" + |
181 |
"text: \n" + |
182 |
"{3}"; |
183 |
|
184 |
/** |
185 |
* The pattern for the message: |
186 |
* "The <param> element with unknown value." |
187 |
* where: |
188 |
* <ul> |
189 |
* <li>{0} - a text of <param> element.</li> |
190 |
* <li>{1} - a URI string.</li> |
191 |
* <li>{2} - a string value of the offset of the buggy text in the HTML |
192 |
* file.</li> |
193 |
* <li>{3} - a buggy text.</li> |
194 |
* <li>{4} - a URISyntaxException text</li> |
195 |
* </ul> |
196 |
*/ |
197 |
private static final String ERROR5 = |
198 |
"The <param> element has incorrect URI value. \n" + |
199 |
"exception: {4}\n" + |
200 |
"{0}\n" + |
201 |
"file:\n" + |
202 |
"{1}\n" + |
203 |
"offset: {2}\n" + |
204 |
"text: \n" + |
205 |
"{3}\n"; |
206 |
|
207 |
private CheckLinks.State state; |
208 |
|
209 |
private List<String> knownClassIDs = new ArrayList<String>(); |
210 |
|
211 |
private List<String> knownURIParams = new ArrayList<String>(); |
212 |
{ |
213 |
addTestableURIParam( |
214 |
"java:org.netbeans.modules.javahelp.BrowserDisplayer.content"); |
215 |
addTestableURIParam( |
216 |
"java:com.sun.java.help.impl.JHSecondaryViewer.content"); |
217 |
} |
218 |
|
219 |
/** |
220 |
* Constructs <code>HTMLObjectElementsChecker</code> associated with the |
221 |
* specified <code>CheckLinks</code> process. |
222 |
* @param task - The state of the <code>CheckLinks</code> process. |
223 |
*/ |
224 |
public HTMLObjectElementsChecker(CheckLinks.State state) { |
225 |
this.state = state; |
226 |
} |
227 |
|
228 |
/** |
229 |
* Excludes class of the <object> elements from testing by registering |
230 |
* the specifyed <code>classid</code> as a known classid for this checker. |
231 |
* @param classid - a value of the <code>classid</code> attribute |
232 |
* used in the <object> elements of that class. |
233 |
*/ |
234 |
public void excludeObjectClass(String classid) { |
235 |
knownClassIDs.add(classid); |
236 |
} |
237 |
|
238 |
/** |
239 |
* Registers the specified <code>paramid</code> as a URI Param that will be |
240 |
* tested. |
241 |
* <br/> |
242 |
* <pre><code> |
243 |
* paramid := classid PARAM_NAME_SEPARATOR paramName |
244 |
* paramName is a value of the name attribute of the param element nested to |
245 |
* the object element with the classid attribute whose value is used in the |
246 |
* paramid :-) |
247 |
* </code></pre> |
248 |
* @param paramid - The id of the PARAM element associated with the classid |
249 |
* of the outer OBJECT element. |
250 |
*/ |
251 |
public void addTestableURIParam(String paramid) { |
252 |
knownURIParams.add(paramid); |
253 |
int dotPos = paramid.lastIndexOf(PARAM_NAME_SEPARATOR); |
254 |
String classid = paramid.substring(0, dotPos); |
255 |
knownClassIDs.add(classid); |
256 |
} |
257 |
|
258 |
/** |
259 |
* Checks the specified HTML <code>content</code>. |
260 |
* The HTML <code>content</code> may contain the <object> elements |
261 |
* that will be checked. |
262 |
* |
263 |
* @param content - The HTML content under test. |
264 |
*/ |
265 |
public void check(String content) { |
266 |
URI uri = state.u; |
267 |
antLog("Check HTML <object> elements in " + uri, Project.MSG_VERBOSE); |
268 |
HTML html = new HTML(content); |
269 |
// process OBJECT elements: |
270 |
Iterable<HTML.Object> oi = html.getObjects(); |
271 |
for(HTML.Object e : oi) { |
272 |
HTML.Attribute a = e.getAttribute("classid"); |
273 |
if(a == null) { |
274 |
error(ERROR1, uri, e.getLineColumn(), e.getText()); |
275 |
continue; |
276 |
} |
277 |
String classid=a.getValue(); |
278 |
if(!knownClassIDs.contains(classid)) { |
279 |
error(ERROR2, a.getValue(), uri, e.getLineColumn(), |
280 |
e.getText()); |
281 |
continue; |
282 |
} |
283 |
// TODO: May be check the known <object> against its XML Schema, |
284 |
// but when the HTML text should be normalized before. |
285 |
|
286 |
// process nested PARAM elements |
287 |
Iterable<HTML.Object.Param> pi = e.getParams(); |
288 |
for(HTML.Object.Param p : pi) { |
289 |
// process attributes of the PARAM element:; |
290 |
HTML.Attribute paName = p.getAttribute("name"); |
291 |
if(paName == null) { |
292 |
error(ERROR3, p.getText(), uri, e.getLineColumn(), |
293 |
e.getText()); |
294 |
continue; |
295 |
} |
296 |
String paNameValue = paName.getValue(); |
297 |
String paramid = getParamID(classid, paNameValue); |
298 |
if(!knownURIParams.contains(paramid)) { |
299 |
continue; |
300 |
} |
301 |
// check URI defined in the param element. |
302 |
antLog("Check HTML <param> element with URI type: " + paramid, |
303 |
Project.MSG_VERBOSE); |
304 |
|
305 |
HTML.Attribute paValue = p.getAttribute("value"); |
306 |
if(paValue == null) { |
307 |
error(ERROR4, p.getText(), uri, e.getLineColumn(), |
308 |
e.getText()); |
309 |
continue; |
310 |
} |
311 |
String testableURI = paValue.getValue(); |
312 |
CheckLinks.State s = state.clone(); |
313 |
s.recurse = 0; |
314 |
s.referrer = testableURI; |
315 |
try { |
316 |
s.u = new URI(testableURI); |
317 |
} catch (URISyntaxException ex) { |
318 |
error(ERROR5, p.getText(), uri, e.getLineColumn(), |
319 |
e.getText(), ex.toString()); |
320 |
} |
321 |
try { |
322 |
CheckLinks.scan(s); |
323 |
} catch (IOException ex) { |
324 |
ex.printStackTrace(); |
325 |
} |
326 |
} // for(HTML.Object.Param p : pi) |
327 |
} // for(HTML.Object e : oi) |
328 |
|
329 |
} |
330 |
|
331 |
private String getParamID(String classid, String paramName) { |
332 |
return classid + PARAM_NAME_SEPARATOR + paramName; |
333 |
} |
334 |
|
335 |
/** |
336 |
* Shows Ant log message. |
337 |
* @param s - The message string. |
338 |
* @param level - The level of the message, e.g Project.MSG_VERBOSE |
339 |
* |
340 |
* @see org.apache.tools.ant.Project |
341 |
*/ |
342 |
private void antLog(String s, int level) { |
343 |
state.task.log(s, level); |
344 |
} |
345 |
|
346 |
/** |
347 |
* Adds the specified error message to the error list associated with the |
348 |
* <code>CheckLinks.State</code>. An associated Ant task will show these |
349 |
* messages at the completion. The <code>ERROR_PREFIX</code> will be added |
350 |
* before each message. |
351 |
* |
352 |
* @param s - The error message. |
353 |
*/ |
354 |
private void error(String s) { |
355 |
state.errors.add(ERROR_PREFIX + s); |
356 |
} |
357 |
|
358 |
/** |
359 |
* Adds the specified error message to the error list associated with the |
360 |
* <code>CheckLinks.State</code>. An associated Ant task will show these |
361 |
* messages at the completion. |
362 |
* <p>The error message is specified by the given <code>pattern</code> and |
363 |
* uses it to format the given <code>arguments</code>.</p> |
364 |
* <p>The <code>ERROR_PREFIX</code> will be added before each message.</p> |
365 |
* |
366 |
* @param pattern - The pattern of the message. |
367 |
* @param arguments - The arguments of the message. |
368 |
*/ |
369 |
private void error(String pattern, Object... arguments) { |
370 |
error(MessageFormat.format(pattern, arguments)); |
371 |
} |
372 |
|
373 |
|
374 |
/** |
375 |
* HTML parser that helps to find <object> elements, nested |
376 |
* <param> elements and their attributes in the specified HTML |
377 |
* source. |
378 |
* <br/> |
379 |
* Usage: |
380 |
* <pre><code> |
381 |
* HTML html = new HTML(htmlText); |
382 |
* // process OBJECT elements: |
383 |
* Iterable<HTML.Object> oi = html.getObjects(); |
384 |
* for(HTML.Object e : oi) { |
385 |
* int start=e.getStart(); // start position of the element |
386 |
* int end=e.getEnd(); // end position of the element |
387 |
* String text = e.getText(); // text of the element |
388 |
* // ... |
389 |
* // process attributes of the OBJECT element:; |
390 |
* Iterable<HTML.Attribute> ai = e.getAttributes(); |
391 |
* for(HTML.Attribute a : ai) { |
392 |
* String name=a.getName(); // name of the attribute |
393 |
* String value=a.getValue(); // value of the attribute |
394 |
* // ... |
395 |
* } |
396 |
* // process nested PARAM elements |
397 |
* Iterable<HTML.Object.Param> pi = e.getParams(); |
398 |
* for(HTML.Object.Param p : pi) { |
399 |
* String ptext = p.getText(); // text of the element |
400 |
* // process attributes of the PARAM element:; |
401 |
* Iterable<HTML.Attribute> pai = p.getAttributes(); |
402 |
* for(HTML.Attribute pa : pai) { |
403 |
* String name=pa.getName(); // name of the attribute |
404 |
* String value=pa.getValue(); // value of the attribute |
405 |
* // ... |
406 |
* } |
407 |
* } |
408 |
* } |
409 |
* </code></pre> |
410 |
*/ |
411 |
public static class HTML { |
412 |
|
413 |
private static Pattern lineBreak = |
414 |
Pattern.compile("^", Pattern.MULTILINE); |
415 |
|
416 |
/** |
417 |
* SGML source where <!-- ... --> tokens are valid comment delimiters. |
418 |
*/ |
419 |
protected String source; |
420 |
|
421 |
/** |
422 |
* |
423 |
* @param source |
424 |
*/ |
425 |
public HTML(String source) { |
426 |
this.source = source; |
427 |
} |
428 |
|
429 |
/** |
430 |
* Returns an instance of <code>Iterable</code> object for the |
431 |
* <object> elements contained in the HTML source. |
432 |
* @return an <code>Iterable</code> object. |
433 |
*/ |
434 |
public Iterable<HTML.Object> getObjects() { |
435 |
@SuppressWarnings("unchecked") // the cast is correct |
436 |
final Iterator<HTML.Object> ei = new HTML.Object(this); |
437 |
return new Iterable<HTML.Object>() { |
438 |
public Iterator<HTML.Object> iterator() { |
439 |
return ei; |
440 |
} |
441 |
}; |
442 |
} |
443 |
|
444 |
|
445 |
/** |
446 |
* Abstract HTML construction. |
447 |
*/ |
448 |
public abstract class Construction implements Iterator { |
449 |
|
450 |
/** |
451 |
* Match flags, a bit mask that may include |
452 |
* Pattern.CASE_INSENSITIVE, Pattern.MULTILINE, Pattern.DOTALL, |
453 |
* Pattern.UNICODE_CASE, and Pattern.CANON_EQ |
454 |
*/ |
455 |
private int matchFlags; |
456 |
|
457 |
private String regex; |
458 |
|
459 |
private Matcher m; |
460 |
|
461 |
/** |
462 |
* Creates instance of the HTML |
463 |
* @param regex |
464 |
* @param matchFlags |
465 |
*/ |
466 |
protected Construction(String regex, int matchFlags) { |
467 |
this.regex = regex; |
468 |
this.matchFlags = matchFlags; |
469 |
Pattern p = getPattern(); |
470 |
m = p.matcher(source); |
471 |
} |
472 |
|
473 |
/** |
474 |
* Returns <code>Matcher</code> associated with the Construction. |
475 |
* @return <code>Matcher</code> for the Construction. |
476 |
*/ |
477 |
protected Matcher getMatcher() { |
478 |
return m; |
479 |
} |
480 |
|
481 |
/** |
482 |
* Returns <code>Pattern</code> associated with the Construction. |
483 |
* @return <code>Pattern</code> for the Construction. |
484 |
*/ |
485 |
public Pattern getPattern() { |
486 |
return Pattern.compile(regex, matchFlags); |
487 |
} |
488 |
|
489 |
/** |
490 |
* Returns text of the <code>Construction</code>. |
491 |
* @return text of the <code>Construction</code>. |
492 |
*/ |
493 |
public String getText() { |
494 |
return getMatcher().group(); |
495 |
} |
496 |
|
497 |
/** |
498 |
* Checks whether this <code>HTML.Element</code> is not commented |
499 |
* out. |
500 |
* @return false if comment start is before element, but end is not. |
501 |
*/ |
502 |
protected boolean isCommentedOut() { |
503 |
int pos = getMatcher().start(); |
504 |
int commentStart = source.lastIndexOf (Comment.START, pos); |
505 |
int commentEnd = source.lastIndexOf (Comment.END, pos); |
506 |
if(commentStart == -1) { // i.e. ... elem ... |
507 |
return false; |
508 |
} |
509 |
if(commentEnd == -1) { |
510 |
return true; // i.e. <!-- ... elem ... |
511 |
} |
512 |
if(commentEnd < commentStart) { |
513 |
return true; // i.e. ... --> ... <!-- ... elem ... |
514 |
} |
515 |
return false; // i.e. ... <!-- ... --> ... elem ... |
516 |
} |
517 |
|
518 |
/** |
519 |
* Returns the start index of the <code>Construction</code>. |
520 |
* |
521 |
* @return The index of the first character of the |
522 |
* <code>Construction</code>. |
523 |
* @throws <code>IllegalStateException</code> - If no match has yet |
524 |
* been attempted, or if the previous match operation failed. |
525 |
*/ |
526 |
public int getStart() { |
527 |
return getMatcher().start(); |
528 |
} |
529 |
|
530 |
/** |
531 |
* Returns the offset after the last character of the |
532 |
* <code>Construction</code>. |
533 |
* |
534 |
* @return The offset after the last character of the |
535 |
* <code>Construction</code>. |
536 |
* @throws <code>IllegalStateException</code> - If no match has yet |
537 |
* been attempted, or if the previous match operation failed. |
538 |
*/ |
539 |
public int getEnd() { |
540 |
return getMatcher().end(); |
541 |
} |
542 |
|
543 |
/** |
544 |
* Return the start offset of the <code>Construction</code> as a |
545 |
* string in form: <code>Line:Column</code>. |
546 |
* @return a line:col string. |
547 |
*/ |
548 |
public String getLineColumn() { |
549 |
Matcher lbm = lineBreak.matcher(source); |
550 |
int line = 0; |
551 |
int col = 1; |
552 |
int pos = getStart(); |
553 |
while (lbm.find()) { |
554 |
if (lbm.start() <= pos) { |
555 |
line++; |
556 |
col = pos - lbm.start() + 1; |
557 |
} else { |
558 |
break; |
559 |
} |
560 |
} |
561 |
return line + ":" + col; |
562 |
} |
563 |
|
564 |
public boolean hasNext() { |
565 |
while(getMatcher().find()) { |
566 |
if(!isCommentedOut()) { |
567 |
return true; |
568 |
} |
569 |
} |
570 |
return false; |
571 |
} |
572 |
|
573 |
public Construction next() { |
574 |
return this; |
575 |
} |
576 |
|
577 |
public void remove() { |
578 |
throw new UnsupportedOperationException(); |
579 |
} |
580 |
|
581 |
} // Element |
582 |
|
583 |
/** |
584 |
* Commented block. |
585 |
*/ |
586 |
public class Comment { |
587 |
/** |
588 |
* Start token of the HTML commented block. |
589 |
*/ |
590 |
public static final String START = "<!--"; // NOI18N |
591 |
/** |
592 |
* End token of the HTML commented block. |
593 |
*/ |
594 |
public static final String END = "-->"; // NOI18N |
595 |
} |
596 |
|
597 |
/** |
598 |
* HTML <code>Construction</code> with attributes. |
599 |
*/ |
600 |
public class AttributableConstruction extends Construction { |
601 |
|
602 |
private int attListGroup; |
603 |
|
604 |
/** |
605 |
* Creates <code>AttributableConstruction</code>. |
606 |
* @param regex - The regular expression associated with the |
607 |
* <code>AttributableConstruction</code>. |
608 |
* @param matchFlags |
609 |
* @param attListGroup |
610 |
*/ |
611 |
public AttributableConstruction(String regex, |
612 |
int matchFlags, int attListGroup) { |
613 |
super(regex, matchFlags); |
614 |
this.attListGroup = attListGroup; |
615 |
} |
616 |
|
617 |
/** |
618 |
* Returns an <code>Iterable</code> object for the set of attributes |
619 |
* associated whit the HTML element. |
620 |
* |
621 |
* @return an <code>Iterable</code> object. |
622 |
*/ |
623 |
public Iterable<Attribute> getAttributes() { |
624 |
int startAttList = getMatcher().start(attListGroup); |
625 |
int endAttList = getMatcher().end(attListGroup); |
626 |
@SuppressWarnings("unchecked") // the cast is correct |
627 |
final Iterator<Attribute> i = new HTML.Attribute(startAttList, |
628 |
endAttList); |
629 |
return new Iterable<Attribute>() { |
630 |
public Iterator<Attribute> iterator() { |
631 |
return i; |
632 |
} |
633 |
}; |
634 |
} |
635 |
|
636 |
/** |
637 |
* Returns an object attribute with the specified <code>name</code>. |
638 |
* @param name - The name of the object attribute. |
639 |
* @return The object attribute if exists, otherwise |
640 |
* <code>null</code>. |
641 |
*/ |
642 |
public HTML.Attribute getAttribute(String name) { |
643 |
Iterable<HTML.Attribute> ai = getAttributes(); |
644 |
for(HTML.Attribute a : ai) { |
645 |
if(name.equalsIgnoreCase(a.getName())) { |
646 |
return a; |
647 |
} |
648 |
} |
649 |
return null; |
650 |
} |
651 |
} // AttributableConstruction |
652 |
|
653 |
/** |
654 |
* The HTML object element. |
655 |
*/ |
656 |
public class Object extends AttributableConstruction { |
657 |
|
658 |
/** |
659 |
* Regular expression for the attribute list group. |
660 |
* @see http://www.w3.org/TR/html4/intro/sgmltut.html#h-3.2.2 |
661 |
*/ |
662 |
private static final String ATTLIST = "(.*?)"; // NOI18N |
663 |
private static final String START = "<object" + ATTLIST+ ">"; // NOI18N |
664 |
private static final String BODY = "(.*?)"; // NOI18N |
665 |
private static final String END = "</object>"; // NOI18N |
666 |
private static final String REGEX = START + BODY + END; |
667 |
private static final int ATTLIST_GROUP = 1; |
668 |
private static final int PARAMS_GROUP = 2; |
669 |
private static final int MATCH_FLAGS = Pattern.CASE_INSENSITIVE| |
670 |
Pattern.DOTALL|Pattern.MULTILINE; |
671 |
|
672 |
/** |
673 |
* |
674 |
* @param html |
675 |
*/ |
676 |
public Object(HTML html) { |
677 |
super(REGEX, MATCH_FLAGS, ATTLIST_GROUP); |
678 |
} |
679 |
|
680 |
/** |
681 |
* |
682 |
* @return |
683 |
*/ |
684 |
public Iterable<Param> getParams() { |
685 |
int startParams = getMatcher().start(PARAMS_GROUP); |
686 |
int endParams = getMatcher().end(PARAMS_GROUP); |
687 |
@SuppressWarnings("unchecked") // the cast is correct |
688 |
final Iterator<Param> i = new Param(startParams, endParams); |
689 |
return new Iterable<Param>() { |
690 |
public Iterator<Param> iterator() { |
691 |
return i; |
692 |
} |
693 |
}; |
694 |
} |
695 |
|
696 |
/** |
697 |
* |
698 |
*/ |
699 |
public class Param extends AttributableConstruction { |
700 |
private static final String REGEX = "<param(.*?)>"; // NOI18N |
701 |
private static final int ATTLIST_GROUP = 1; |
702 |
private static final int MATCH_FLAGS = Pattern.CASE_INSENSITIVE| |
703 |
Pattern.DOTALL|Pattern.MULTILINE; |
704 |
|
705 |
private Matcher paramMatcher; |
706 |
|
707 |
/** |
708 |
* |
709 |
* @param start |
710 |
* @param end |
711 |
*/ |
712 |
public Param(int start, int end) { |
713 |
super(REGEX, MATCH_FLAGS, ATTLIST_GROUP); |
714 |
Matcher parentMatcher = super.getMatcher(); |
715 |
paramMatcher = parentMatcher.region(start, end); |
716 |
} |
717 |
|
718 |
/** |
719 |
* |
720 |
* @return |
721 |
*/ |
722 |
@Override |
723 |
public Matcher getMatcher() { |
724 |
return paramMatcher; |
725 |
} |
726 |
|
727 |
} // Param |
728 |
|
729 |
} // Object |
730 |
|
731 |
/** |
732 |
* Attribute of the element. |
733 |
* This implementation has the following limitations: |
734 |
* <ul> |
735 |
* <li> Only default case of the attribute syntax specified by the HTML |
736 |
* 4.01 Specification is supported:<br/> |
737 |
* "By default, SGML requires that all attribute values be |
738 |
* delimited using either double quotation marks (ASCII decimal |
739 |
* 34) or single quotation marks (ASCII decimal 39).<br/> |
740 |
* ...<br/> |
741 |
* <b>We recommend using quotation marks even when it is possible |
742 |
* to eliminate them.</b>"</li> |
743 |
* <li>DON'T SUPPORTED: "Single quote marks can be included within the |
744 |
* attribute value when the value is delimited by double quote |
745 |
* marks, and vice versa. Authors may also use numeric character |
746 |
* references to represent double quotes (") and single quotes |
747 |
* ('). For double quotes authors can also use the character |
748 |
* entity reference &quot;</li> |
749 |
* <li>DON'T SUPPORTED: In certain cases, authors may specify the value |
750 |
* of an attribute without any quotation marks. The attribute value |
751 |
* may only contain letters (a-z and A-Z), digits (0-9), hyphens |
752 |
* (ASCII decimal 45), periods (ASCII decimal 46), underscores |
753 |
* (ASCII decimal 95), and colons (ASCII decimal 58).</li> |
754 |
* </ul> |
755 |
* |
756 |
* @see <a href=http://www.w3.org/TR/html4/intro/sgmltut.html#h-3.2.2> |
757 |
* HTML 4.01 Specification - 3.2.2 Attributes</a> |
758 |
*/ |
759 |
public class Attribute extends Construction { |
760 |
private static final String NAME = "(\\w*?)"; |
761 |
private static final String SD = "(\"|')"; // Start delimiter |
762 |
private static final String VALUE = "(.*?)"; |
763 |
private static final String ED = "\\2"; // reffers to SD group |
764 |
private static final String REGEX = |
765 |
NAME + "=\\s*?" + SD + VALUE + ED; // NOI18N |
766 |
private static final int NAME_GROUP = 1; |
767 |
private static final int VALUE_GROUP = 3; |
768 |
private static final int MATCH_FLAGS = |
769 |
Pattern.CASE_INSENSITIVE|Pattern.DOTALL; // Not multiline! |
770 |
|
771 |
private Matcher attMatcher; |
772 |
|
773 |
/** |
774 |
* |
775 |
* @param start |
776 |
* @param end |
777 |
*/ |
778 |
public Attribute(int start, int end) { |
779 |
super(REGEX, MATCH_FLAGS); |
780 |
Matcher parentMatcher = super.getMatcher(); |
781 |
attMatcher = parentMatcher.region(start, end); |
782 |
} |
783 |
|
784 |
/** |
785 |
* |
786 |
* @return |
787 |
*/ |
788 |
@Override |
789 |
public Matcher getMatcher() { |
790 |
return attMatcher; |
791 |
} |
792 |
|
793 |
/** |
794 |
* |
795 |
* @return |
796 |
*/ |
797 |
public String getName() { |
798 |
return getMatcher().group(NAME_GROUP); |
799 |
} |
800 |
|
801 |
/** |
802 |
* |
803 |
* @return |
804 |
*/ |
805 |
public String getValue() { |
806 |
return getMatcher().group(VALUE_GROUP); |
807 |
} |
808 |
} // Attribute |
809 |
|
810 |
} // HTML |
811 |
|
812 |
/** |
813 |
* Development time test entry point. |
814 |
* @param args the command line arguments |
815 |
*/ |
816 |
public static void main(String[] args) { |
817 |
|
818 |
// String testHTML = "<html> \n " + |
819 |
// "\t<OBJECT CLASSID=\"java:com.sun.java.help.impl.JHSecondaryViewer\" type= 'aaa'> \n" + |
820 |
// "\t\t<param name=\"content\" value=\"1popup_gloss.html\"> \n" + |
821 |
// "\t\t<param name=\"viewerActivator\" value=\"javax.help.LinkLabel\">\n" + |
822 |
// "\t\t<param name=\"viewerStyle\" value=\"javax.help.Popup\">\n" + |
823 |
// "\t\t<param name=\"viewerSize\" value=\"400,250\">\n" + |
824 |
// "\t\t<param name=\"text\" value=\"popup windows\">\n" + |
825 |
// "\t\t<param name=\"textColor\" value=\"blue\">\n" + |
826 |
// "\t\t<param name=\"viewerName\" value=\"1glossary\">\n" + |
827 |
// "\t</OBJECT>\n" + |
828 |
// "\n <a />"+ |
829 |
// "\t<OBJECT CLASSID=\"java:com.sun.java.help.impl.JHSecondaryViewer\" > \n" + |
830 |
// "\t\t<param name=\"content\" value=\"2popup_gloss.html\" /> \n" + |
831 |
// "\t\t<param name=\"viewerActivator\" value=\"javax.help.LinkLabel\">\n" + |
832 |
// "\t\t<param name=\"viewerStyle\" value=\"javax.help.Popup\">\n" + |
833 |
// "\t\t<param name=\"viewerSize\" value=\"400,250\">\n" + |
834 |
// "\t\t<param name=\"text\" value=\"popup windows\">\n" + |
835 |
// "\t\t<param name=\"textColor\" value=\"blue\">\n" + |
836 |
// "\t\t<param name=\"viewerName\" value=\"2glossary\">\n" + |
837 |
// "\t</OBJECT>\n" + |
838 |
// "</html>"; |
839 |
// HTML html = new HTML(testHTML); |
840 |
// |
841 |
// Iterable<HTML.Object> oi = html.getObjects(); |
842 |
// for(HTML.Object e : oi) { |
843 |
// System.out.println("start="+e.getStart()); |
844 |
// System.out.println("end="+e.getEnd()); |
845 |
// System.out.println(e.getText()); |
846 |
// System.out.println("Attributes:"); |
847 |
// Iterable<HTML.Attribute> ai = e.getAttributes(); |
848 |
// for(HTML.Attribute a : ai) { |
849 |
// System.out.println(a.getText()); |
850 |
// System.out.println("name=["+ a.getName() + "] value=[" + a.getValue() + "]" ); |
851 |
// } |
852 |
// System.out.println("Parameters:"); |
853 |
// Iterable<HTML.Object.Param> pi = e.getParams(); |
854 |
// for(HTML.Object.Param p : pi) { |
855 |
// System.out.println(p.getText()); |
856 |
// // Attributes of the PARAM element:; |
857 |
// Iterable<HTML.Attribute> pai = p.getAttributes(); |
858 |
// for(HTML.Attribute pa : pai) { |
859 |
// String name=pa.getName(); // name of the attribute |
860 |
// String value=pa.getValue(); // value of the attribute |
861 |
// System.out.println("name=["+ name + "] value=[" + value + "]" ); |
862 |
// // ... |
863 |
// } |
864 |
// } |
865 |
// } |
866 |
} |
867 |
} |